1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
61 */
62 /*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68 #include <os/log.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/domain.h>
73 #include <sys/fcntl.h>
74 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */
75 #include <sys/file_internal.h>
76 #include <sys/guarded.h>
77 #include <sys/filedesc.h>
78 #include <sys/lock.h>
79 #include <sys/mbuf.h>
80 #include <sys/namei.h>
81 #include <sys/proc_internal.h>
82 #include <sys/kauth.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/stat.h>
87 #include <sys/sysctl.h>
88 #include <sys/un.h>
89 #include <sys/unpcb.h>
90 #include <sys/vnode_internal.h>
91 #include <sys/kdebug.h>
92 #include <sys/mcache.h>
93
94 #include <kern/zalloc.h>
95 #include <kern/locks.h>
96 #include <kern/task.h>
97
98 #include <net/sockaddr_utils.h>
99
100 #if __has_ptrcheck
101 #include <machine/trap.h>
102 #endif /* __has_ptrcheck */
103
104 #if CONFIG_MACF
105 #include <security/mac_framework.h>
106 #endif /* CONFIG_MACF */
107
108 #include <mach/vm_param.h>
109
110 #ifndef ROUNDUP64
111 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
112 #endif
113
114 #ifndef ADVANCE64
115 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
116 #endif
117
118 /*
119 * Maximum number of FDs that can be passed in an mbuf
120 */
121 #define UIPC_MAX_CMSG_FD 512
122
123 ZONE_DEFINE_TYPE(unp_zone, "unpzone", struct unpcb, ZC_NONE);
124 static unp_gen_t unp_gencnt;
125 static u_int unp_count;
126
127 static LCK_ATTR_DECLARE(unp_mtx_attr, 0, 0);
128 static LCK_GRP_DECLARE(unp_mtx_grp, "unp_list");
129 static LCK_RW_DECLARE_ATTR(unp_list_mtx, &unp_mtx_grp, &unp_mtx_attr);
130
131 static LCK_MTX_DECLARE_ATTR(unp_disconnect_lock, &unp_mtx_grp, &unp_mtx_attr);
132 static LCK_MTX_DECLARE_ATTR(unp_connect_lock, &unp_mtx_grp, &unp_mtx_attr);
133 static LCK_MTX_DECLARE_ATTR(uipc_lock, &unp_mtx_grp, &unp_mtx_attr);
134
135 static u_int disconnect_in_progress;
136
137 static struct unp_head unp_shead, unp_dhead;
138 static int unp_defer;
139 static thread_call_t unp_gc_tcall;
140 static LIST_HEAD(, fileglob) unp_msghead = LIST_HEAD_INITIALIZER(unp_msghead);
141
142 SYSCTL_DECL(_net_local);
143
144 static int unp_rights; /* file descriptors in flight */
145 static int unp_disposed; /* discarded file descriptors */
146
147 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
148
149 #define ULEF_CONNECTION 0x01
150 uint32_t unp_log_enable_flags = 0;
151
152 SYSCTL_UINT(_net_local, OID_AUTO, log, CTLFLAG_RD | CTLFLAG_LOCKED,
153 &unp_log_enable_flags, 0, "");
154
155
156 /*
157 * mDNSResponder tracing. When enabled, endpoints connected to
158 * /var/run/mDNSResponder will be traced; during each send on
159 * the traced socket, we log the PID and process name of the
160 * sending process. We also print out a bit of info related
161 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
162 * of mDNSResponder stays the same.
163 */
164 #define MDNSRESPONDER_PATH "/var/run/mDNSResponder"
165
166 static int unpst_tracemdns; /* enable tracing */
167
168 #define MDNS_IPC_MSG_HDR_VERSION_1 1
169
170 struct mdns_ipc_msg_hdr {
171 uint32_t version;
172 uint32_t datalen;
173 uint32_t ipc_flags;
174 uint32_t op;
175 union {
176 void *context;
177 uint32_t u32[2];
178 } __attribute__((packed));
179 uint32_t reg_index;
180 } __attribute__((packed));
181
182 /*
183 * Unix communications domain.
184 *
185 * TODO:
186 * SEQPACKET, RDM
187 * rethink name space problems
188 * need a proper out-of-band
189 * lock pushdown
190 */
191 static struct sockaddr sun_noname = { .sa_len = 3, .sa_family = AF_LOCAL, .sa_data = { 0 } };
192 static ino_t unp_ino; /* prototype for fake inode numbers */
193
194 static int unp_attach(struct socket *);
195 static void unp_detach(struct unpcb *);
196 static int unp_bind(struct unpcb *, struct sockaddr *, proc_t);
197 static int unp_connect(struct socket *, struct sockaddr *, proc_t);
198 static void unp_disconnect(struct unpcb *);
199 static void unp_shutdown(struct unpcb *);
200 static void unp_drop(struct unpcb *, int);
201 static void unp_gc(thread_call_param_t arg0, thread_call_param_t arg1);
202 static void unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
203 static void unp_mark(struct fileglob *, __unused void *);
204 static void unp_discard(struct fileglob *, void *);
205 static int unp_internalize(struct mbuf *, proc_t);
206 static int unp_listen(struct unpcb *, proc_t);
207 static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
208 static void unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
209
210 __startup_func
211 static void
unp_gc_setup(void)212 unp_gc_setup(void)
213 {
214 unp_gc_tcall = thread_call_allocate_with_options(unp_gc,
215 NULL, THREAD_CALL_PRIORITY_KERNEL,
216 THREAD_CALL_OPTIONS_ONCE);
217 }
218 STARTUP(THREAD_CALL, STARTUP_RANK_MIDDLE, unp_gc_setup);
219
220 static void
unp_get_locks_in_order(struct socket * so,struct socket * conn_so)221 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
222 {
223 if (so < conn_so) {
224 socket_lock(conn_so, 1);
225 } else {
226 struct unpcb *unp = sotounpcb(so);
227 unp->unp_flags |= UNP_DONTDISCONNECT;
228 unp->rw_thrcount++;
229 socket_unlock(so, 0);
230
231 /* Get the locks in the correct order */
232 socket_lock(conn_so, 1);
233 socket_lock(so, 0);
234 unp->rw_thrcount--;
235 if (unp->rw_thrcount == 0) {
236 unp->unp_flags &= ~UNP_DONTDISCONNECT;
237 wakeup(unp);
238 }
239 }
240 }
241
242 static int
uipc_abort(struct socket * so)243 uipc_abort(struct socket *so)
244 {
245 struct unpcb *unp = sotounpcb(so);
246
247 if (unp == 0) {
248 return EINVAL;
249 }
250 unp_drop(unp, ECONNABORTED);
251 unp_detach(unp);
252 sofree(so);
253 return 0;
254 }
255
256 static int
uipc_accept(struct socket * so,struct sockaddr ** nam)257 uipc_accept(struct socket *so, struct sockaddr **nam)
258 {
259 struct unpcb *unp = sotounpcb(so);
260
261 if (unp == 0) {
262 return EINVAL;
263 }
264
265 /*
266 * Pass back name of connected socket,
267 * if it was bound and we are still connected
268 * (our peer may have closed already!).
269 */
270 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
271 *nam = dup_sockaddr((struct sockaddr *)
272 unp->unp_conn->unp_addr, 1);
273 } else {
274 if (unp_log_enable_flags & ULEF_CONNECTION) {
275 os_log(OS_LOG_DEFAULT, "%s: peer disconnected unp_gencnt %llu",
276 __func__, unp->unp_gencnt);
277 }
278 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
279 }
280 return 0;
281 }
282
283 /*
284 * Returns: 0 Success
285 * EISCONN
286 * unp_attach:
287 */
288 static int
uipc_attach(struct socket * so,__unused int proto,__unused proc_t p)289 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
290 {
291 struct unpcb *unp = sotounpcb(so);
292
293 if (unp != 0) {
294 return EISCONN;
295 }
296 return unp_attach(so);
297 }
298
299 static int
uipc_bind(struct socket * so,struct sockaddr * nam,proc_t p)300 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
301 {
302 struct unpcb *unp = sotounpcb(so);
303
304 if (unp == 0) {
305 return EINVAL;
306 }
307
308 return unp_bind(unp, nam, p);
309 }
310
311 /*
312 * Returns: 0 Success
313 * EINVAL
314 * unp_connect:??? [See elsewhere in this file]
315 */
316 static int
uipc_connect(struct socket * so,struct sockaddr * nam,proc_t p)317 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
318 {
319 struct unpcb *unp = sotounpcb(so);
320
321 if (unp == 0) {
322 return EINVAL;
323 }
324 return unp_connect(so, nam, p);
325 }
326
327 /*
328 * Returns: 0 Success
329 * EINVAL
330 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
331 * unp_connect2:EINVAL Invalid argument
332 */
333 static int
uipc_connect2(struct socket * so1,struct socket * so2)334 uipc_connect2(struct socket *so1, struct socket *so2)
335 {
336 struct unpcb *unp = sotounpcb(so1);
337
338 if (unp == 0) {
339 return EINVAL;
340 }
341
342 return unp_connect2(so1, so2);
343 }
344
345 /* control is EOPNOTSUPP */
346
347 static int
uipc_detach(struct socket * so)348 uipc_detach(struct socket *so)
349 {
350 struct unpcb *unp = sotounpcb(so);
351
352 if (unp == 0) {
353 return EINVAL;
354 }
355
356 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
357 unp_detach(unp);
358 return 0;
359 }
360
361 static int
uipc_disconnect(struct socket * so)362 uipc_disconnect(struct socket *so)
363 {
364 struct unpcb *unp = sotounpcb(so);
365
366 if (unp == 0) {
367 return EINVAL;
368 }
369 unp_disconnect(unp);
370 return 0;
371 }
372
373 /*
374 * Returns: 0 Success
375 * EINVAL
376 */
377 static int
uipc_listen(struct socket * so,__unused proc_t p)378 uipc_listen(struct socket *so, __unused proc_t p)
379 {
380 struct unpcb *unp = sotounpcb(so);
381
382 if (unp == 0 || unp->unp_vnode == 0) {
383 return EINVAL;
384 }
385 return unp_listen(unp, p);
386 }
387
388 static int
uipc_peeraddr(struct socket * so,struct sockaddr ** nam)389 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
390 {
391 struct unpcb *unp = sotounpcb(so);
392 struct socket *so2;
393
394 if (unp == NULL) {
395 return EINVAL;
396 }
397 so2 = unp->unp_conn != NULL ? unp->unp_conn->unp_socket : NULL;
398 if (so2 != NULL) {
399 unp_get_locks_in_order(so, so2);
400 }
401
402 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
403 *nam = dup_sockaddr((struct sockaddr *)
404 unp->unp_conn->unp_addr, 1);
405 } else {
406 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
407 }
408 if (so2 != NULL) {
409 socket_unlock(so2, 1);
410 }
411 return 0;
412 }
413
414 static int
uipc_rcvd(struct socket * so,__unused int flags)415 uipc_rcvd(struct socket *so, __unused int flags)
416 {
417 struct unpcb *unp = sotounpcb(so);
418 struct socket *so2;
419
420 if (unp == 0) {
421 return EINVAL;
422 }
423 switch (so->so_type) {
424 case SOCK_DGRAM:
425 panic("uipc_rcvd DGRAM?");
426 /*NOTREACHED*/
427
428 case SOCK_STREAM:
429 #define rcv (&so->so_rcv)
430 #define snd (&so2->so_snd)
431 if (unp->unp_conn == 0) {
432 break;
433 }
434
435 so2 = unp->unp_conn->unp_socket;
436 unp_get_locks_in_order(so, so2);
437 /*
438 * Adjust backpressure on sender
439 * and wakeup any waiting to write.
440 */
441 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
442 unp->unp_mbcnt = rcv->sb_mbcnt;
443 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
444 unp->unp_cc = rcv->sb_cc;
445 if (sb_notify(&so2->so_snd)) {
446 sowakeup(so2, &so2->so_snd, so);
447 }
448
449 socket_unlock(so2, 1);
450
451 #undef snd
452 #undef rcv
453 break;
454
455 default:
456 panic("uipc_rcvd unknown socktype");
457 }
458 return 0;
459 }
460
461 /* pru_rcvoob is EOPNOTSUPP */
462
463 /*
464 * Returns: 0 Success
465 * EINVAL
466 * EOPNOTSUPP
467 * EPIPE
468 * ENOTCONN
469 * EISCONN
470 * unp_internalize:EINVAL
471 * unp_internalize:EBADF
472 * unp_connect:EAFNOSUPPORT Address family not supported
473 * unp_connect:EINVAL Invalid argument
474 * unp_connect:ENOTSOCK Not a socket
475 * unp_connect:ECONNREFUSED Connection refused
476 * unp_connect:EISCONN Socket is connected
477 * unp_connect:EPROTOTYPE Protocol wrong type for socket
478 * unp_connect:???
479 * sbappendaddr:ENOBUFS [5th argument, contents modified]
480 * sbappendaddr:??? [whatever a filter author chooses]
481 */
482 static int
uipc_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,proc_t p)483 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
484 struct mbuf *control, proc_t p)
485 {
486 int error = 0;
487 struct unpcb *unp = sotounpcb(so);
488 struct socket *so2;
489 int32_t len = m_pktlen(m);
490
491 if (unp == 0) {
492 error = EINVAL;
493 goto release;
494 }
495 if (flags & PRUS_OOB) {
496 error = EOPNOTSUPP;
497 goto release;
498 }
499
500 if (control) {
501 /* release lock to avoid deadlock (4436174) */
502 socket_unlock(so, 0);
503 error = unp_internalize(control, p);
504 socket_lock(so, 0);
505 if (error) {
506 goto release;
507 }
508 }
509
510 switch (so->so_type) {
511 case SOCK_DGRAM:
512 {
513 struct sockaddr *from;
514
515 if (nam) {
516 if (unp->unp_conn) {
517 error = EISCONN;
518 break;
519 }
520 error = unp_connect(so, nam, p);
521 if (error) {
522 so->so_state &= ~SS_ISCONNECTING;
523 break;
524 }
525 } else {
526 if (unp->unp_conn == 0) {
527 error = ENOTCONN;
528 break;
529 }
530 }
531
532 so2 = unp->unp_conn->unp_socket;
533 if (so != so2) {
534 unp_get_locks_in_order(so, so2);
535 }
536
537 if (unp->unp_addr) {
538 from = (struct sockaddr *)unp->unp_addr;
539 } else {
540 from = &sun_noname;
541 }
542 /*
543 * sbappendaddr() will fail when the receiver runs out of
544 * space; in contrast to SOCK_STREAM, we will lose messages
545 * for the SOCK_DGRAM case when the receiver's queue overflows.
546 * SB_UNIX on the socket buffer implies that the callee will
547 * not free the control message, if any, because we would need
548 * to call unp_dispose() on it.
549 */
550 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
551 control = NULL;
552 if (sb_notify(&so2->so_rcv)) {
553 sowakeup(so2, &so2->so_rcv, so);
554 }
555 so2->so_tc_stats[0].rxpackets += 1;
556 so2->so_tc_stats[0].rxbytes += len;
557 } else if (control != NULL && error == 0) {
558 /* A socket filter took control; don't touch it */
559 control = NULL;
560 }
561
562 if (so != so2) {
563 socket_unlock(so2, 1);
564 }
565
566 m = NULL;
567 if (nam) {
568 unp_disconnect(unp);
569 }
570 break;
571 }
572
573 case SOCK_STREAM: {
574 int didreceive = 0;
575 #define rcv (&so2->so_rcv)
576 #define snd (&so->so_snd)
577 /* Connect if not connected yet. */
578 /*
579 * Note: A better implementation would complain
580 * if not equal to the peer's address.
581 */
582 if ((so->so_state & SS_ISCONNECTED) == 0) {
583 if (nam) {
584 error = unp_connect(so, nam, p);
585 if (error) {
586 so->so_state &= ~SS_ISCONNECTING;
587 break; /* XXX */
588 }
589 } else {
590 error = ENOTCONN;
591 break;
592 }
593 }
594
595 if (so->so_state & SS_CANTSENDMORE) {
596 error = EPIPE;
597 break;
598 }
599 if (unp->unp_conn == 0) {
600 panic("uipc_send connected but no connection? "
601 "socket state: %x socket flags: %x socket flags1: %x.",
602 so->so_state, so->so_flags, so->so_flags1);
603 }
604
605 so2 = unp->unp_conn->unp_socket;
606 unp_get_locks_in_order(so, so2);
607
608 /* Check socket state again as we might have unlocked the socket
609 * while trying to get the locks in order
610 */
611
612 if ((so->so_state & SS_CANTSENDMORE)) {
613 error = EPIPE;
614 socket_unlock(so2, 1);
615 break;
616 }
617
618 if (unp->unp_flags & UNP_TRACE_MDNS) {
619 struct mdns_ipc_msg_hdr hdr;
620
621 if (mbuf_copydata(m, 0, sizeof(hdr), &hdr) == 0 &&
622 hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
623 os_log(OS_LOG_DEFAULT,
624 "%s[mDNSResponder] pid=%d (%s): op=0x%x",
625 __func__, proc_getpid(p), p->p_comm, ntohl(hdr.op));
626 }
627 }
628
629 /*
630 * Send to paired receive port, and then reduce send buffer
631 * hiwater marks to maintain backpressure. Wake up readers.
632 * SB_UNIX flag will allow new record to be appended to the
633 * receiver's queue even when it is already full. It is
634 * possible, however, that append might fail. In that case,
635 * we will need to call unp_dispose() on the control message;
636 * the callee will not free it since SB_UNIX is set.
637 */
638 didreceive = control ?
639 sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
640
641 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
642 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
643 if ((int32_t)snd->sb_hiwat >=
644 (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
645 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
646 } else {
647 snd->sb_hiwat = 0;
648 }
649 unp->unp_conn->unp_cc = rcv->sb_cc;
650 if (didreceive) {
651 control = NULL;
652 if (sb_notify(&so2->so_rcv)) {
653 sowakeup(so2, &so2->so_rcv, so);
654 }
655 so2->so_tc_stats[0].rxpackets += 1;
656 so2->so_tc_stats[0].rxbytes += len;
657 } else if (control != NULL && error == 0) {
658 /* A socket filter took control; don't touch it */
659 control = NULL;
660 }
661
662 socket_unlock(so2, 1);
663 m = NULL;
664 #undef snd
665 #undef rcv
666 }
667 break;
668
669 default:
670 panic("uipc_send unknown socktype");
671 }
672
673 so->so_tc_stats[0].txpackets += 1;
674 so->so_tc_stats[0].txbytes += len;
675
676 /*
677 * SEND_EOF is equivalent to a SEND followed by
678 * a SHUTDOWN.
679 */
680 if (flags & PRUS_EOF) {
681 socantsendmore(so);
682 unp_shutdown(unp);
683 }
684
685 if (control && error != 0) {
686 socket_unlock(so, 0);
687 unp_dispose(control);
688 socket_lock(so, 0);
689 }
690
691 release:
692 if (control) {
693 m_freem(control);
694 }
695 if (m) {
696 m_freem(m);
697 }
698 return error;
699 }
700
701 static int
uipc_sense(struct socket * so,void * ub,int isstat64)702 uipc_sense(struct socket *so, void *ub, int isstat64)
703 {
704 struct unpcb *unp = sotounpcb(so);
705 struct socket *so2;
706 blksize_t blksize;
707
708 if (unp == 0) {
709 return EINVAL;
710 }
711
712 blksize = so->so_snd.sb_hiwat;
713 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
714 so2 = unp->unp_conn->unp_socket;
715 blksize += so2->so_rcv.sb_cc;
716 }
717 if (unp->unp_ino == 0) {
718 unp->unp_ino = unp_ino++;
719 }
720
721 if (isstat64 != 0) {
722 struct stat64 *sb64;
723
724 sb64 = (struct stat64 *)ub;
725 sb64->st_blksize = blksize;
726 sb64->st_dev = NODEV;
727 sb64->st_ino = (ino64_t)unp->unp_ino;
728 } else {
729 struct stat *sb;
730
731 sb = (struct stat *)ub;
732 sb->st_blksize = blksize;
733 sb->st_dev = NODEV;
734 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
735 }
736
737 return 0;
738 }
739
740 /*
741 * Returns: 0 Success
742 * EINVAL
743 *
744 * Notes: This is not strictly correct, as unp_shutdown() also calls
745 * socantrcvmore(). These should maybe both be conditionalized
746 * on the 'how' argument in soshutdown() as called from the
747 * shutdown() system call.
748 */
749 static int
uipc_shutdown(struct socket * so)750 uipc_shutdown(struct socket *so)
751 {
752 struct unpcb *unp = sotounpcb(so);
753
754 if (unp == 0) {
755 return EINVAL;
756 }
757 socantsendmore(so);
758 unp_shutdown(unp);
759 return 0;
760 }
761
762 /*
763 * Returns: 0 Success
764 * EINVAL Invalid argument
765 */
766 static int
uipc_sockaddr(struct socket * so,struct sockaddr ** nam)767 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
768 {
769 struct unpcb *unp = sotounpcb(so);
770
771 if (unp == NULL) {
772 return EINVAL;
773 }
774 if (unp->unp_addr != NULL) {
775 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
776 } else {
777 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
778 }
779 return 0;
780 }
781
782 struct pr_usrreqs uipc_usrreqs = {
783 .pru_abort = uipc_abort,
784 .pru_accept = uipc_accept,
785 .pru_attach = uipc_attach,
786 .pru_bind = uipc_bind,
787 .pru_connect = uipc_connect,
788 .pru_connect2 = uipc_connect2,
789 .pru_detach = uipc_detach,
790 .pru_disconnect = uipc_disconnect,
791 .pru_listen = uipc_listen,
792 .pru_peeraddr = uipc_peeraddr,
793 .pru_rcvd = uipc_rcvd,
794 .pru_send = uipc_send,
795 .pru_sense = uipc_sense,
796 .pru_shutdown = uipc_shutdown,
797 .pru_sockaddr = uipc_sockaddr,
798 .pru_sosend = sosend,
799 .pru_soreceive = soreceive,
800 };
801
802 int
uipc_ctloutput(struct socket * so,struct sockopt * sopt)803 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
804 {
805 struct unpcb *unp = sotounpcb(so);
806 int error = 0;
807 pid_t peerpid;
808 proc_t p;
809 task_t t __single;
810 struct socket *peerso;
811
812 switch (sopt->sopt_dir) {
813 case SOPT_GET:
814 switch (sopt->sopt_name) {
815 case LOCAL_PEERCRED:
816 if (unp->unp_flags & UNP_HAVEPC) {
817 error = sooptcopyout(sopt, &unp->unp_peercred,
818 sizeof(unp->unp_peercred));
819 } else {
820 if (so->so_type == SOCK_STREAM) {
821 error = ENOTCONN;
822 } else {
823 error = EINVAL;
824 }
825 }
826 break;
827 case LOCAL_PEERPID:
828 case LOCAL_PEEREPID:
829 if (unp->unp_conn == NULL) {
830 error = ENOTCONN;
831 break;
832 }
833 peerso = unp->unp_conn->unp_socket;
834 if (peerso == NULL) {
835 panic("peer is connected but has no socket?");
836 }
837 unp_get_locks_in_order(so, peerso);
838 if (sopt->sopt_name == LOCAL_PEEREPID &&
839 peerso->so_flags & SOF_DELEGATED) {
840 peerpid = peerso->e_pid;
841 } else {
842 peerpid = peerso->last_pid;
843 }
844 socket_unlock(peerso, 1);
845 error = sooptcopyout(sopt, &peerpid, sizeof(peerpid));
846 break;
847 case LOCAL_PEERUUID:
848 case LOCAL_PEEREUUID:
849 if (unp->unp_conn == NULL) {
850 error = ENOTCONN;
851 break;
852 }
853 peerso = unp->unp_conn->unp_socket;
854 if (peerso == NULL) {
855 panic("peer is connected but has no socket?");
856 }
857 unp_get_locks_in_order(so, peerso);
858 if (sopt->sopt_name == LOCAL_PEEREUUID &&
859 peerso->so_flags & SOF_DELEGATED) {
860 error = sooptcopyout(sopt, &peerso->e_uuid,
861 sizeof(peerso->e_uuid));
862 } else {
863 error = sooptcopyout(sopt, &peerso->last_uuid,
864 sizeof(peerso->last_uuid));
865 }
866 socket_unlock(peerso, 1);
867 break;
868 case LOCAL_PEERTOKEN:
869 if (unp->unp_conn == NULL) {
870 error = ENOTCONN;
871 break;
872 }
873 peerso = unp->unp_conn->unp_socket;
874 if (peerso == NULL) {
875 panic("peer is connected but has no socket?");
876 }
877 unp_get_locks_in_order(so, peerso);
878 peerpid = peerso->last_pid;
879 p = proc_find(peerpid);
880 if (p != PROC_NULL) {
881 t = proc_task(p);
882 if (t != TASK_NULL) {
883 audit_token_t peertoken;
884 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
885 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&peertoken, &count) == KERN_SUCCESS) {
886 error = sooptcopyout(sopt, &peertoken, sizeof(peertoken));
887 } else {
888 error = EINVAL;
889 }
890 } else {
891 error = EINVAL;
892 }
893 proc_rele(p);
894 } else {
895 error = EINVAL;
896 }
897 socket_unlock(peerso, 1);
898 break;
899 default:
900 error = EOPNOTSUPP;
901 break;
902 }
903 break;
904 case SOPT_SET:
905 default:
906 error = EOPNOTSUPP;
907 break;
908 }
909
910 return error;
911 }
912
913 /*
914 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
915 * for stream sockets, although the total for sender and receiver is
916 * actually only PIPSIZ.
917 * Datagram sockets really use the sendspace as the maximum datagram size,
918 * and don't really want to reserve the sendspace. Their recvspace should
919 * be large enough for at least one max-size datagram plus address.
920 */
921 #ifndef PIPSIZ
922 #define PIPSIZ 8192
923 #endif
924 static u_int32_t unpst_sendspace = PIPSIZ;
925 static u_int32_t unpst_recvspace = PIPSIZ;
926 static u_int32_t unpdg_sendspace = 2 * 1024; /* really max datagram size */
927 static u_int32_t unpdg_recvspace = 4 * 1024;
928
929 SYSCTL_DECL(_net_local_stream);
930 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
931 &unpst_sendspace, 0, "");
932 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
933 &unpst_recvspace, 0, "");
934 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
935 &unpst_tracemdns, 0, "");
936 SYSCTL_DECL(_net_local_dgram);
937 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
938 &unpdg_sendspace, 0, "");
939 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
940 &unpdg_recvspace, 0, "");
941
942 /*
943 * Returns: 0 Success
944 * ENOBUFS
945 * soreserve:ENOBUFS
946 */
947 static int
unp_attach(struct socket * so)948 unp_attach(struct socket *so)
949 {
950 struct unpcb *unp;
951 int error = 0;
952
953 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
954 switch (so->so_type) {
955 case SOCK_STREAM:
956 error = soreserve(so, unpst_sendspace, unpst_recvspace);
957 break;
958
959 case SOCK_DGRAM:
960 /*
961 * By default soreserve() will set the low water
962 * mark to MCLBYTES which is too high given our
963 * default sendspace. Override it here to something
964 * sensible.
965 */
966 so->so_snd.sb_lowat = 1;
967 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
968 break;
969
970 default:
971 panic("unp_attach");
972 }
973 if (error) {
974 return error;
975 }
976 }
977 unp = zalloc_flags(unp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
978
979 lck_mtx_init(&unp->unp_mtx, &unp_mtx_grp, &unp_mtx_attr);
980
981 lck_rw_lock_exclusive(&unp_list_mtx);
982 LIST_INIT(&unp->unp_refs);
983 unp->unp_socket = so;
984 unp->unp_gencnt = ++unp_gencnt;
985 unp_count++;
986 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
987 &unp_dhead : &unp_shead, unp, unp_link);
988 lck_rw_done(&unp_list_mtx);
989 so->so_pcb = (caddr_t)unp;
990 /*
991 * Mark AF_UNIX socket buffers accordingly so that:
992 *
993 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
994 * the lack of space; this essentially loosens the sbspace() check,
995 * since there is disconnect between sosend() and uipc_send() with
996 * respect to flow control that might result in our dropping the
997 * data in uipc_send(). By setting this, we allow for slightly
998 * more records to be appended to the receiving socket to avoid
999 * losing data (which we can't afford in the SOCK_STREAM case).
1000 * Flow control still takes place since we adjust the sender's
1001 * hiwat during each send. This doesn't affect the SOCK_DGRAM
1002 * case and append would still fail when the queue overflows.
1003 *
1004 * b. In the presence of control messages containing internalized
1005 * file descriptors, the append routines will not free them since
1006 * we'd need to undo the work first via unp_dispose().
1007 */
1008 so->so_rcv.sb_flags |= SB_UNIX;
1009 so->so_snd.sb_flags |= SB_UNIX;
1010 return 0;
1011 }
1012
1013 static void
unp_detach(struct unpcb * unp)1014 unp_detach(struct unpcb *unp)
1015 {
1016 int so_locked = 1;
1017
1018 lck_rw_lock_exclusive(&unp_list_mtx);
1019 LIST_REMOVE(unp, unp_link);
1020 --unp_count;
1021 ++unp_gencnt;
1022 lck_rw_done(&unp_list_mtx);
1023 if (unp->unp_vnode) {
1024 struct vnode *tvp = NULL;
1025 socket_unlock(unp->unp_socket, 0);
1026
1027 /* Holding unp_connect_lock will avoid a race between
1028 * a thread closing the listening socket and a thread
1029 * connecting to it.
1030 */
1031 lck_mtx_lock(&unp_connect_lock);
1032 socket_lock(unp->unp_socket, 0);
1033 if (unp->unp_vnode) {
1034 tvp = unp->unp_vnode;
1035 unp->unp_vnode->v_socket = NULL;
1036 unp->unp_vnode = NULL;
1037 }
1038 lck_mtx_unlock(&unp_connect_lock);
1039 if (tvp != NULL) {
1040 vnode_rele(tvp); /* drop the usecount */
1041 }
1042 }
1043 if (unp->unp_conn) {
1044 unp_disconnect(unp);
1045 }
1046 while (unp->unp_refs.lh_first) {
1047 struct unpcb *unp2 = NULL;
1048
1049 /* This datagram socket is connected to one or more
1050 * sockets. In order to avoid a race condition between removing
1051 * this reference and closing the connected socket, we need
1052 * to check disconnect_in_progress
1053 */
1054 if (so_locked == 1) {
1055 socket_unlock(unp->unp_socket, 0);
1056 so_locked = 0;
1057 }
1058 lck_mtx_lock(&unp_disconnect_lock);
1059 while (disconnect_in_progress != 0) {
1060 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1061 PSOCK, "disconnect", NULL);
1062 }
1063 disconnect_in_progress = 1;
1064 lck_mtx_unlock(&unp_disconnect_lock);
1065
1066 /* Now we are sure that any unpcb socket disconnect is not happening */
1067 if (unp->unp_refs.lh_first != NULL) {
1068 unp2 = unp->unp_refs.lh_first;
1069 socket_lock(unp2->unp_socket, 1);
1070 }
1071
1072 lck_mtx_lock(&unp_disconnect_lock);
1073 disconnect_in_progress = 0;
1074 wakeup(&disconnect_in_progress);
1075 lck_mtx_unlock(&unp_disconnect_lock);
1076
1077 if (unp2 != NULL) {
1078 /* We already locked this socket and have a reference on it */
1079 unp_drop(unp2, ECONNRESET);
1080 socket_unlock(unp2->unp_socket, 1);
1081 }
1082 }
1083
1084 if (so_locked == 0) {
1085 socket_lock(unp->unp_socket, 0);
1086 so_locked = 1;
1087 }
1088 soisdisconnected(unp->unp_socket);
1089 /* makes sure we're getting dealloced */
1090 unp->unp_socket->so_flags |= SOF_PCBCLEARING;
1091 }
1092
1093 /*
1094 * Returns: 0 Success
1095 * EAFNOSUPPORT
1096 * EINVAL
1097 * EADDRINUSE
1098 * namei:??? [anything namei can return]
1099 * vnode_authorize:??? [anything vnode_authorize can return]
1100 *
1101 * Notes: p at this point is the current process, as this function is
1102 * only called by sobind().
1103 */
1104 static int
unp_bind(struct unpcb * unp,struct sockaddr * nam,proc_t p)1105 unp_bind(
1106 struct unpcb *unp,
1107 struct sockaddr *nam,
1108 proc_t p)
1109 {
1110 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1111 struct vnode *vp __single, *dvp;
1112 struct vnode_attr va;
1113 vfs_context_t ctx = vfs_context_current();
1114 int error, namelen;
1115 struct nameidata nd;
1116 struct socket *so = unp->unp_socket;
1117 char buf[SOCK_MAXADDRLEN];
1118
1119 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1120 return EAFNOSUPPORT;
1121 }
1122
1123 /*
1124 * Check if the socket is already bound to an address
1125 */
1126 if (unp->unp_vnode != NULL) {
1127 return EINVAL;
1128 }
1129 /*
1130 * Check if the socket may have been shut down
1131 */
1132 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1133 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1134 return EINVAL;
1135 }
1136
1137 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
1138 if (namelen <= 0) {
1139 return EINVAL;
1140 }
1141 /*
1142 * Note: sun_path is not a zero terminated "C" string
1143 */
1144 if (namelen >= SOCK_MAXADDRLEN) {
1145 return EINVAL;
1146 }
1147 bcopy(soun->sun_path, buf, namelen);
1148 buf[namelen] = 0;
1149
1150 socket_unlock(so, 0);
1151
1152 NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1153 CAST_USER_ADDR_T(buf), ctx);
1154 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1155 error = namei(&nd);
1156 if (error) {
1157 socket_lock(so, 0);
1158 return error;
1159 }
1160 dvp = nd.ni_dvp;
1161 vp = nd.ni_vp;
1162
1163 if (vp != NULL) {
1164 /*
1165 * need to do this before the vnode_put of dvp
1166 * since we may have to release an fs_nodelock
1167 */
1168 nameidone(&nd);
1169
1170 vnode_put(dvp);
1171 vnode_put(vp);
1172
1173 socket_lock(so, 0);
1174 return EADDRINUSE;
1175 }
1176
1177 VATTR_INIT(&va);
1178 VATTR_SET(&va, va_type, VSOCK);
1179 VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd.fd_cmask));
1180
1181 #if CONFIG_MACF
1182 error = mac_vnode_check_create(ctx,
1183 nd.ni_dvp, &nd.ni_cnd, &va);
1184
1185 if (error == 0)
1186 #endif /* CONFIG_MACF */
1187 #if CONFIG_MACF_SOCKET_SUBSET
1188 error = mac_vnode_check_uipc_bind(ctx,
1189 nd.ni_dvp, &nd.ni_cnd, &va);
1190
1191 if (error == 0)
1192 #endif /* MAC_SOCKET_SUBSET */
1193 /* authorize before creating */
1194 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1195
1196 if (!error) {
1197 /* create the socket */
1198 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1199 }
1200
1201 nameidone(&nd);
1202 vnode_put(dvp);
1203
1204 if (error) {
1205 socket_lock(so, 0);
1206 return error;
1207 }
1208
1209 socket_lock(so, 0);
1210
1211 if (unp->unp_vnode != NULL) {
1212 vnode_put(vp); /* drop the iocount */
1213 return EINVAL;
1214 }
1215
1216 error = vnode_ref(vp); /* gain a longterm reference */
1217 if (error) {
1218 vnode_put(vp); /* drop the iocount */
1219 return error;
1220 }
1221
1222 vp->v_socket = unp->unp_socket;
1223 unp->unp_vnode = vp;
1224 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1225 vnode_put(vp); /* drop the iocount */
1226
1227 return 0;
1228 }
1229
1230
1231 /*
1232 * Returns: 0 Success
1233 * EAFNOSUPPORT Address family not supported
1234 * EINVAL Invalid argument
1235 * ENOTSOCK Not a socket
1236 * ECONNREFUSED Connection refused
1237 * EPROTOTYPE Protocol wrong type for socket
1238 * EISCONN Socket is connected
1239 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
1240 * unp_connect2:EINVAL Invalid argument
1241 * namei:??? [anything namei can return]
1242 * vnode_authorize:???? [anything vnode_authorize can return]
1243 *
1244 * Notes: p at this point is the current process, as this function is
1245 * only called by sosend(), sendfile(), and soconnectlock().
1246 */
1247 static int
unp_connect(struct socket * so,struct sockaddr * nam,__unused proc_t p)1248 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1249 {
1250 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1251 struct vnode *vp;
1252 struct socket *so2, *so3, *list_so = NULL;
1253 struct unpcb *unp, *unp2, *unp3;
1254 vfs_context_t ctx = vfs_context_current();
1255 int error, len;
1256 struct nameidata nd;
1257 char buf[SOCK_MAXADDRLEN];
1258
1259 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1260 return EAFNOSUPPORT;
1261 }
1262
1263 unp = sotounpcb(so);
1264 so2 = so3 = NULL;
1265
1266 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1267 if (len <= 0) {
1268 return EINVAL;
1269 }
1270 /*
1271 * Note: sun_path is not a zero terminated "C" string
1272 */
1273 if (len >= SOCK_MAXADDRLEN) {
1274 return EINVAL;
1275 }
1276
1277 soisconnecting(so);
1278
1279 bcopy(soun->sun_path, buf, len);
1280 buf[len] = 0;
1281
1282 socket_unlock(so, 0);
1283
1284 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1285 CAST_USER_ADDR_T(buf), ctx);
1286 error = namei(&nd);
1287 if (error) {
1288 socket_lock(so, 0);
1289 return error;
1290 }
1291 nameidone(&nd);
1292 vp = nd.ni_vp;
1293 if (vp->v_type != VSOCK) {
1294 error = ENOTSOCK;
1295 socket_lock(so, 0);
1296 goto out;
1297 }
1298
1299 #if CONFIG_MACF_SOCKET_SUBSET
1300 error = mac_vnode_check_uipc_connect(ctx, vp, so);
1301 if (error) {
1302 socket_lock(so, 0);
1303 goto out;
1304 }
1305 #endif /* MAC_SOCKET_SUBSET */
1306
1307 error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1308 if (error) {
1309 socket_lock(so, 0);
1310 goto out;
1311 }
1312
1313 lck_mtx_lock(&unp_connect_lock);
1314
1315 if (vp->v_socket == 0) {
1316 lck_mtx_unlock(&unp_connect_lock);
1317 error = ECONNREFUSED;
1318 socket_lock(so, 0);
1319 goto out;
1320 }
1321
1322 socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1323 so2 = vp->v_socket;
1324 lck_mtx_unlock(&unp_connect_lock);
1325
1326
1327 if (so2->so_pcb == NULL) {
1328 error = ECONNREFUSED;
1329 if (so != so2) {
1330 socket_unlock(so2, 1);
1331 socket_lock(so, 0);
1332 } else {
1333 /* Release the reference held for the listen socket */
1334 VERIFY(so2->so_usecount > 0);
1335 so2->so_usecount--;
1336 }
1337 goto out;
1338 }
1339
1340 if (so < so2) {
1341 socket_unlock(so2, 0);
1342 socket_lock(so, 0);
1343 socket_lock(so2, 0);
1344 } else if (so > so2) {
1345 socket_lock(so, 0);
1346 }
1347 /*
1348 * Check if socket was connected while we were trying to
1349 * get the socket locks in order.
1350 * XXX - probably shouldn't return an error for SOCK_DGRAM
1351 */
1352 if ((so->so_state & SS_ISCONNECTED) != 0) {
1353 error = EISCONN;
1354 goto decref_out;
1355 }
1356
1357 if (so->so_type != so2->so_type) {
1358 error = EPROTOTYPE;
1359 goto decref_out;
1360 }
1361
1362 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1363 /* Release the incoming socket but keep a reference */
1364 socket_unlock(so, 0);
1365
1366 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1367 (so3 = sonewconn(so2, 0, nam)) == 0) {
1368 error = ECONNREFUSED;
1369 if (so != so2) {
1370 socket_unlock(so2, 1);
1371 socket_lock(so, 0);
1372 } else {
1373 socket_lock(so, 0);
1374 /* Release the reference held for
1375 * listen socket.
1376 */
1377 VERIFY(so2->so_usecount > 0);
1378 so2->so_usecount--;
1379 }
1380 goto out;
1381 }
1382 unp2 = sotounpcb(so2);
1383 unp3 = sotounpcb(so3);
1384 if (unp2->unp_addr) {
1385 unp3->unp_addr = (struct sockaddr_un *)
1386 dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1387 }
1388
1389 /*
1390 * unp_peercred management:
1391 *
1392 * The connecter's (client's) credentials are copied
1393 * from its process structure at the time of connect()
1394 * (which is now).
1395 */
1396 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1397 unp3->unp_flags |= UNP_HAVEPC;
1398 /*
1399 * The receiver's (server's) credentials are copied
1400 * from the unp_peercred member of socket on which the
1401 * former called listen(); unp_listen() cached that
1402 * process's credentials at that time so we can use
1403 * them now.
1404 */
1405 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1406 ("unp_connect: listener without cached peercred"));
1407
1408 /* Here we need to have both so and so2 locks and so2
1409 * is already locked. Lock ordering is required.
1410 */
1411 if (so < so2) {
1412 socket_unlock(so2, 0);
1413 socket_lock(so, 0);
1414 socket_lock(so2, 0);
1415 } else {
1416 socket_lock(so, 0);
1417 }
1418
1419 /* Check again if the socket state changed when its lock was released */
1420 if ((so->so_state & SS_ISCONNECTED) != 0) {
1421 error = EISCONN;
1422 socket_unlock(so2, 1);
1423 socket_lock(so3, 0);
1424 sofreelastref(so3, 1);
1425 goto out;
1426 }
1427 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1428 sizeof(unp->unp_peercred));
1429 unp->unp_flags |= UNP_HAVEPC;
1430
1431 /* Hold the reference on listening socket until the end */
1432 socket_unlock(so2, 0);
1433 list_so = so2;
1434
1435 /* Lock ordering doesn't matter because so3 was just created */
1436 socket_lock(so3, 1);
1437 so2 = so3;
1438
1439 /*
1440 * Enable tracing for mDNSResponder endpoints. (The use
1441 * of sizeof instead of strlen below takes the null
1442 * terminating character into account.)
1443 */
1444 if (unpst_tracemdns &&
1445 !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1446 sizeof(MDNSRESPONDER_PATH))) {
1447 unp->unp_flags |= UNP_TRACE_MDNS;
1448 unp2->unp_flags |= UNP_TRACE_MDNS;
1449 }
1450 }
1451
1452 error = unp_connect2(so, so2);
1453
1454 decref_out:
1455 if (so2 != NULL) {
1456 if (so != so2) {
1457 socket_unlock(so2, 1);
1458 } else {
1459 /* Release the extra reference held for the listen socket.
1460 * This is possible only for SOCK_DGRAM sockets. We refuse
1461 * connecting to the same socket for SOCK_STREAM sockets.
1462 */
1463 VERIFY(so2->so_usecount > 0);
1464 so2->so_usecount--;
1465 }
1466 }
1467
1468 if (list_so != NULL) {
1469 socket_lock(list_so, 0);
1470 socket_unlock(list_so, 1);
1471 }
1472
1473 out:
1474 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1475 vnode_put(vp);
1476 return error;
1477 }
1478
1479 /*
1480 * Returns: 0 Success
1481 * EPROTOTYPE Protocol wrong type for socket
1482 * EINVAL Invalid argument
1483 */
1484 int
unp_connect2(struct socket * so,struct socket * so2)1485 unp_connect2(struct socket *so, struct socket *so2)
1486 {
1487 struct unpcb *unp = sotounpcb(so);
1488 struct unpcb *unp2;
1489
1490 if (so2->so_type != so->so_type) {
1491 return EPROTOTYPE;
1492 }
1493
1494 unp2 = sotounpcb(so2);
1495
1496 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1497 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1498
1499 /* Verify both sockets are still opened */
1500 if (unp == 0 || unp2 == 0) {
1501 return EINVAL;
1502 }
1503
1504 unp->unp_conn = unp2;
1505 so2->so_usecount++;
1506
1507 switch (so->so_type) {
1508 case SOCK_DGRAM:
1509 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1510
1511 if (so != so2) {
1512 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1513 /* Keep an extra reference on so2 that will be dropped
1514 * soon after getting the locks in order
1515 */
1516 socket_unlock(so2, 0);
1517 soisconnected(so);
1518 unp_get_locks_in_order(so, so2);
1519 VERIFY(so2->so_usecount > 0);
1520 so2->so_usecount--;
1521 } else {
1522 soisconnected(so);
1523 }
1524
1525 break;
1526
1527 case SOCK_STREAM:
1528 /* This takes care of socketpair */
1529 if (!(unp->unp_flags & UNP_HAVEPC) &&
1530 !(unp2->unp_flags & UNP_HAVEPC)) {
1531 cru2x(kauth_cred_get(), &unp->unp_peercred);
1532 unp->unp_flags |= UNP_HAVEPC;
1533
1534 cru2x(kauth_cred_get(), &unp2->unp_peercred);
1535 unp2->unp_flags |= UNP_HAVEPC;
1536 }
1537 unp2->unp_conn = unp;
1538 so->so_usecount++;
1539
1540 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1541 socket_unlock(so, 0);
1542 soisconnected(so2);
1543
1544 /* Keep an extra reference on so2, that will be dropped soon after
1545 * getting the locks in order again.
1546 */
1547 socket_unlock(so2, 0);
1548
1549 socket_lock(so, 0);
1550 soisconnected(so);
1551
1552 unp_get_locks_in_order(so, so2);
1553 /* Decrement the extra reference left before */
1554 VERIFY(so2->so_usecount > 0);
1555 so2->so_usecount--;
1556 break;
1557
1558 default:
1559 panic("unknown socket type %d in unp_connect2", so->so_type);
1560 }
1561 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1562 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1563 return 0;
1564 }
1565
1566 static void
unp_disconnect(struct unpcb * unp)1567 unp_disconnect(struct unpcb *unp)
1568 {
1569 struct unpcb *unp2 = NULL;
1570 struct socket *so2 = NULL, *so;
1571 struct socket *waitso;
1572 int so_locked = 1, strdisconn = 0;
1573
1574 so = unp->unp_socket;
1575 if (unp->unp_conn == NULL) {
1576 return;
1577 }
1578 lck_mtx_lock(&unp_disconnect_lock);
1579 while (disconnect_in_progress != 0) {
1580 if (so_locked == 1) {
1581 socket_unlock(so, 0);
1582 so_locked = 0;
1583 }
1584 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1585 PSOCK, "disconnect", NULL);
1586 }
1587 disconnect_in_progress = 1;
1588 lck_mtx_unlock(&unp_disconnect_lock);
1589
1590 if (so_locked == 0) {
1591 socket_lock(so, 0);
1592 so_locked = 1;
1593 }
1594
1595 unp2 = unp->unp_conn;
1596
1597 if (unp2 == 0 || unp2->unp_socket == NULL) {
1598 goto out;
1599 }
1600 so2 = unp2->unp_socket;
1601
1602 try_again:
1603 if (so == so2) {
1604 if (so_locked == 0) {
1605 socket_lock(so, 0);
1606 }
1607 waitso = so;
1608 } else if (so < so2) {
1609 if (so_locked == 0) {
1610 socket_lock(so, 0);
1611 }
1612 socket_lock(so2, 1);
1613 waitso = so2;
1614 } else {
1615 if (so_locked == 1) {
1616 socket_unlock(so, 0);
1617 }
1618 socket_lock(so2, 1);
1619 socket_lock(so, 0);
1620 waitso = so;
1621 }
1622 so_locked = 1;
1623
1624 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1625 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1626
1627 /* Check for the UNP_DONTDISCONNECT flag, if it
1628 * is set, release both sockets and go to sleep
1629 */
1630
1631 if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1632 if (so != so2) {
1633 socket_unlock(so2, 1);
1634 }
1635 so_locked = 0;
1636
1637 (void)msleep(waitso->so_pcb, &unp->unp_mtx,
1638 PSOCK | PDROP, "unpdisconnect", NULL);
1639 goto try_again;
1640 }
1641
1642 if (unp->unp_conn == NULL) {
1643 panic("unp_conn became NULL after sleep");
1644 }
1645
1646 unp->unp_conn = NULL;
1647 VERIFY(so2->so_usecount > 0);
1648 so2->so_usecount--;
1649
1650 if (unp->unp_flags & UNP_TRACE_MDNS) {
1651 unp->unp_flags &= ~UNP_TRACE_MDNS;
1652 }
1653
1654 switch (unp->unp_socket->so_type) {
1655 case SOCK_DGRAM:
1656 LIST_REMOVE(unp, unp_reflink);
1657 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1658 if (so != so2) {
1659 socket_unlock(so2, 1);
1660 }
1661 break;
1662
1663 case SOCK_STREAM:
1664 unp2->unp_conn = NULL;
1665 VERIFY(so->so_usecount > 0);
1666 so->so_usecount--;
1667
1668 /*
1669 * Set the socket state correctly but do a wakeup later when
1670 * we release all locks except the socket lock, this will avoid
1671 * a deadlock.
1672 */
1673 unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1674 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1675
1676 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1677 unp2->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1678
1679 if (unp2->unp_flags & UNP_TRACE_MDNS) {
1680 unp2->unp_flags &= ~UNP_TRACE_MDNS;
1681 }
1682
1683 strdisconn = 1;
1684 break;
1685 default:
1686 panic("unknown socket type %d", so->so_type);
1687 }
1688 out:
1689 lck_mtx_lock(&unp_disconnect_lock);
1690 disconnect_in_progress = 0;
1691 wakeup(&disconnect_in_progress);
1692 lck_mtx_unlock(&unp_disconnect_lock);
1693
1694 if (strdisconn) {
1695 socket_unlock(so, 0);
1696 soisdisconnected(so2);
1697 socket_unlock(so2, 1);
1698
1699 socket_lock(so, 0);
1700 soisdisconnected(so);
1701 }
1702 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1703 return;
1704 }
1705
1706 /*
1707 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1708 * The unpcb_compat data structure is passed to user space and must not change.
1709 */
1710 static void
unpcb_to_compat(struct unpcb * up,struct unpcb_compat * cp)1711 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1712 {
1713 #if defined(__LP64__)
1714 cp->unp_link.le_next = (u_int32_t)
1715 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1716 cp->unp_link.le_prev = (u_int32_t)
1717 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1718 #else
1719 cp->unp_link.le_next = (struct unpcb_compat *)
1720 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1721 cp->unp_link.le_prev = (struct unpcb_compat **)
1722 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1723 #endif
1724 cp->unp_socket = (_UNPCB_PTR(struct socket *))
1725 VM_KERNEL_ADDRPERM(up->unp_socket);
1726 cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1727 VM_KERNEL_ADDRPERM(up->unp_vnode);
1728 cp->unp_ino = up->unp_ino;
1729 cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1730 VM_KERNEL_ADDRPERM(up->unp_conn);
1731 cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1732 #if defined(__LP64__)
1733 cp->unp_reflink.le_next =
1734 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1735 cp->unp_reflink.le_prev =
1736 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1737 #else
1738 cp->unp_reflink.le_next =
1739 (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1740 cp->unp_reflink.le_prev =
1741 (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1742 #endif
1743 cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1744 VM_KERNEL_ADDRPERM(up->unp_addr);
1745 cp->unp_cc = up->unp_cc;
1746 cp->unp_mbcnt = up->unp_mbcnt;
1747 cp->unp_gencnt = up->unp_gencnt;
1748 }
1749
1750 static int
1751 unp_pcblist SYSCTL_HANDLER_ARGS
1752 {
1753 #pragma unused(oidp,arg2)
1754 int error, i, n;
1755 struct unpcb *unp, **unp_list __bidi_indexable;
1756 size_t unp_list_len;
1757 unp_gen_t gencnt;
1758 struct xunpgen xug;
1759 struct unp_head *head;
1760
1761 lck_rw_lock_shared(&unp_list_mtx);
1762 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1763
1764 /*
1765 * The process of preparing the PCB list is too time-consuming and
1766 * resource-intensive to repeat twice on every request.
1767 */
1768 if (req->oldptr == USER_ADDR_NULL) {
1769 n = unp_count;
1770 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1771 sizeof(struct xunpcb);
1772 lck_rw_done(&unp_list_mtx);
1773 return 0;
1774 }
1775
1776 if (req->newptr != USER_ADDR_NULL) {
1777 lck_rw_done(&unp_list_mtx);
1778 return EPERM;
1779 }
1780
1781 /*
1782 * OK, now we're committed to doing something.
1783 */
1784 gencnt = unp_gencnt;
1785 n = unp_count;
1786
1787 bzero(&xug, sizeof(xug));
1788 xug.xug_len = sizeof(xug);
1789 xug.xug_count = n;
1790 xug.xug_gen = gencnt;
1791 xug.xug_sogen = so_gencnt;
1792 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1793 if (error) {
1794 lck_rw_done(&unp_list_mtx);
1795 return error;
1796 }
1797
1798 /*
1799 * We are done if there is no pcb
1800 */
1801 if (n == 0) {
1802 lck_rw_done(&unp_list_mtx);
1803 return 0;
1804 }
1805
1806 unp_list_len = n;
1807 unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1808 if (unp_list == 0) {
1809 lck_rw_done(&unp_list_mtx);
1810 return ENOMEM;
1811 }
1812
1813 for (unp = head->lh_first, i = 0; unp && i < n;
1814 unp = unp->unp_link.le_next) {
1815 if (unp->unp_gencnt <= gencnt) {
1816 unp_list[i++] = unp;
1817 }
1818 }
1819 n = i; /* in case we lost some during malloc */
1820
1821 error = 0;
1822 for (i = 0; i < n; i++) {
1823 unp = unp_list[i];
1824 if (unp->unp_gencnt <= gencnt) {
1825 struct xunpcb xu;
1826
1827 bzero(&xu, sizeof(xu));
1828 xu.xu_len = sizeof(xu);
1829 xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1830 VM_KERNEL_ADDRPERM(unp);
1831 /*
1832 * XXX - need more locking here to protect against
1833 * connect/disconnect races for SMP.
1834 */
1835 if (unp->unp_addr) {
1836 SOCKADDR_COPY(unp->unp_addr, &xu.xu_au);
1837 }
1838 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1839 SOCKADDR_COPY(unp->unp_conn->unp_addr, &xu.xu_cau);
1840 }
1841 unpcb_to_compat(unp, &xu.xu_unp);
1842 sotoxsocket(unp->unp_socket, &xu.xu_socket);
1843 error = SYSCTL_OUT(req, &xu, sizeof(xu));
1844 }
1845 }
1846 if (!error) {
1847 /*
1848 * Give the user an updated idea of our state.
1849 * If the generation differs from what we told
1850 * her before, she knows that something happened
1851 * while we were processing this request, and it
1852 * might be necessary to retry.
1853 */
1854 bzero(&xug, sizeof(xug));
1855 xug.xug_len = sizeof(xug);
1856 xug.xug_gen = unp_gencnt;
1857 xug.xug_sogen = so_gencnt;
1858 xug.xug_count = unp_count;
1859 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1860 }
1861 kfree_type(struct unpcb *, unp_list_len, unp_list);
1862 lck_rw_done(&unp_list_mtx);
1863 return error;
1864 }
1865
1866 const caddr_t SYSCTL_SOCK_DGRAM_ARG = __unsafe_forge_single(caddr_t, SOCK_DGRAM);
1867 const caddr_t SYSCTL_SOCK_STREAM_ARG = __unsafe_forge_single(caddr_t, SOCK_STREAM);
1868
1869 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1870 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1871 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist, "S,xunpcb",
1872 "List of active local datagram sockets");
1873 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1874 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1875 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist, "S,xunpcb",
1876 "List of active local stream sockets");
1877
1878 #if XNU_TARGET_OS_OSX
1879
1880 static int
1881 unp_pcblist64 SYSCTL_HANDLER_ARGS
1882 {
1883 #pragma unused(oidp,arg2)
1884 int error, i, n;
1885 struct unpcb *unp, **unp_list;
1886 unp_gen_t gencnt;
1887 struct xunpgen xug;
1888 struct unp_head *head;
1889
1890 lck_rw_lock_shared(&unp_list_mtx);
1891 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1892
1893 /*
1894 * The process of preparing the PCB list is too time-consuming and
1895 * resource-intensive to repeat twice on every request.
1896 */
1897 if (req->oldptr == USER_ADDR_NULL) {
1898 n = unp_count;
1899 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1900 (sizeof(struct xunpcb64));
1901 lck_rw_done(&unp_list_mtx);
1902 return 0;
1903 }
1904
1905 if (req->newptr != USER_ADDR_NULL) {
1906 lck_rw_done(&unp_list_mtx);
1907 return EPERM;
1908 }
1909
1910 /*
1911 * OK, now we're committed to doing something.
1912 */
1913 gencnt = unp_gencnt;
1914 n = unp_count;
1915
1916 bzero(&xug, sizeof(xug));
1917 xug.xug_len = sizeof(xug);
1918 xug.xug_count = n;
1919 xug.xug_gen = gencnt;
1920 xug.xug_sogen = so_gencnt;
1921 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1922 if (error) {
1923 lck_rw_done(&unp_list_mtx);
1924 return error;
1925 }
1926
1927 /*
1928 * We are done if there is no pcb
1929 */
1930 if (n == 0) {
1931 lck_rw_done(&unp_list_mtx);
1932 return 0;
1933 }
1934
1935 size_t unp_list_len = n;
1936 unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1937 if (unp_list == 0) {
1938 lck_rw_done(&unp_list_mtx);
1939 return ENOMEM;
1940 }
1941
1942 for (unp = head->lh_first, i = 0; unp && i < n;
1943 unp = unp->unp_link.le_next) {
1944 if (unp->unp_gencnt <= gencnt) {
1945 unp_list[i++] = unp;
1946 }
1947 }
1948 n = i; /* in case we lost some during malloc */
1949
1950 error = 0;
1951 for (i = 0; i < n; i++) {
1952 unp = unp_list[i];
1953 if (unp->unp_gencnt <= gencnt) {
1954 struct xunpcb64 xu;
1955 size_t xu_len = sizeof(struct xunpcb64);
1956
1957 bzero(&xu, xu_len);
1958 xu.xu_len = (u_int32_t)xu_len;
1959 xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1960 xu.xunp_link.le_next = (u_int64_t)
1961 VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1962 xu.xunp_link.le_prev = (u_int64_t)
1963 VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1964 xu.xunp_socket = (u_int64_t)
1965 VM_KERNEL_ADDRPERM(unp->unp_socket);
1966 xu.xunp_vnode = (u_int64_t)
1967 VM_KERNEL_ADDRPERM(unp->unp_vnode);
1968 xu.xunp_ino = unp->unp_ino;
1969 xu.xunp_conn = (u_int64_t)
1970 VM_KERNEL_ADDRPERM(unp->unp_conn);
1971 xu.xunp_refs = (u_int64_t)
1972 VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1973 xu.xunp_reflink.le_next = (u_int64_t)
1974 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1975 xu.xunp_reflink.le_prev = (u_int64_t)
1976 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1977 xu.xunp_cc = unp->unp_cc;
1978 xu.xunp_mbcnt = unp->unp_mbcnt;
1979 xu.xunp_gencnt = unp->unp_gencnt;
1980
1981 if (unp->unp_socket) {
1982 sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1983 }
1984
1985 /*
1986 * XXX - need more locking here to protect against
1987 * connect/disconnect races for SMP.
1988 */
1989 if (unp->unp_addr) {
1990 bcopy(unp->unp_addr, &xu.xu_au,
1991 unp->unp_addr->sun_len);
1992 }
1993 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1994 bcopy(unp->unp_conn->unp_addr,
1995 &xu.xu_cau,
1996 unp->unp_conn->unp_addr->sun_len);
1997 }
1998
1999 error = SYSCTL_OUT(req, &xu, xu_len);
2000 }
2001 }
2002 if (!error) {
2003 /*
2004 * Give the user an updated idea of our state.
2005 * If the generation differs from what we told
2006 * her before, she knows that something happened
2007 * while we were processing this request, and it
2008 * might be necessary to retry.
2009 */
2010 bzero(&xug, sizeof(xug));
2011 xug.xug_len = sizeof(xug);
2012 xug.xug_gen = unp_gencnt;
2013 xug.xug_sogen = so_gencnt;
2014 xug.xug_count = unp_count;
2015 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2016 }
2017 kfree_type(struct unpcb *, unp_list_len, unp_list);
2018 lck_rw_done(&unp_list_mtx);
2019 return error;
2020 }
2021
2022 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
2023 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2024 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist64, "S,xunpcb64",
2025 "List of active local datagram sockets 64 bit");
2026 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
2027 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2028 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist64, "S,xunpcb64",
2029 "List of active local stream sockets 64 bit");
2030
2031 #endif /* XNU_TARGET_OS_OSX */
2032
2033 static int
2034 unp_pcblist_n SYSCTL_HANDLER_ARGS
2035 {
2036 #pragma unused(oidp,arg2)
2037 int error = 0;
2038 int i, n;
2039 struct unpcb *unp;
2040 unp_gen_t gencnt;
2041 struct xunpgen xug;
2042 struct unp_head *head;
2043 void *buf __single = NULL;
2044 size_t item_size = ROUNDUP64(sizeof(struct xunpcb_n)) +
2045 ROUNDUP64(sizeof(struct xsocket_n)) +
2046 2 * ROUNDUP64(sizeof(struct xsockbuf_n)) +
2047 ROUNDUP64(sizeof(struct xsockstat_n));
2048
2049 buf = kalloc_data(item_size, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2050
2051 lck_rw_lock_shared(&unp_list_mtx);
2052
2053 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
2054
2055 /*
2056 * The process of preparing the PCB list is too time-consuming and
2057 * resource-intensive to repeat twice on every request.
2058 */
2059 if (req->oldptr == USER_ADDR_NULL) {
2060 n = unp_count;
2061 req->oldidx = 2 * sizeof(xug) + (n + n / 8) * item_size;
2062 goto done;
2063 }
2064
2065 if (req->newptr != USER_ADDR_NULL) {
2066 error = EPERM;
2067 goto done;
2068 }
2069
2070 /*
2071 * OK, now we're committed to doing something.
2072 */
2073 gencnt = unp_gencnt;
2074 n = unp_count;
2075
2076 bzero(&xug, sizeof(xug));
2077 xug.xug_len = sizeof(xug);
2078 xug.xug_count = n;
2079 xug.xug_gen = gencnt;
2080 xug.xug_sogen = so_gencnt;
2081 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2082 if (error != 0) {
2083 goto done;
2084 }
2085
2086 /*
2087 * We are done if there is no pcb
2088 */
2089 if (n == 0) {
2090 goto done;
2091 }
2092
2093 for (i = 0, unp = head->lh_first;
2094 i < n && unp != NULL;
2095 i++, unp = unp->unp_link.le_next) {
2096 struct xunpcb_n *xu = (struct xunpcb_n *)buf;
2097 struct xsocket_n *xso = (struct xsocket_n *)
2098 ADVANCE64(xu, sizeof(*xu));
2099 struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *)
2100 ADVANCE64(xso, sizeof(*xso));
2101 struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *)
2102 ADVANCE64(xsbrcv, sizeof(*xsbrcv));
2103 struct xsockstat_n *xsostats = (struct xsockstat_n *)
2104 ADVANCE64(xsbsnd, sizeof(*xsbsnd));
2105
2106 if (unp->unp_gencnt > gencnt) {
2107 continue;
2108 }
2109
2110 bzero(buf, item_size);
2111
2112 xu->xunp_len = sizeof(struct xunpcb_n);
2113 xu->xunp_kind = XSO_UNPCB;
2114 xu->xunp_unpp = (uint64_t)VM_KERNEL_ADDRPERM(unp);
2115 xu->xunp_vnode = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_vnode);
2116 xu->xunp_ino = unp->unp_ino;
2117 xu->xunp_conn = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_conn);
2118 xu->xunp_refs = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
2119 xu->xunp_reflink = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
2120 xu->xunp_cc = unp->unp_cc;
2121 xu->xunp_mbcnt = unp->unp_mbcnt;
2122 xu->xunp_flags = unp->unp_flags;
2123 xu->xunp_gencnt = unp->unp_gencnt;
2124
2125 if (unp->unp_addr) {
2126 SOCKADDR_COPY(unp->unp_addr, &xu->xu_au);
2127 }
2128 if (unp->unp_conn && unp->unp_conn->unp_addr) {
2129 SOCKADDR_COPY(unp->unp_conn->unp_addr, &xu->xu_cau);
2130 }
2131 sotoxsocket_n(unp->unp_socket, xso);
2132 sbtoxsockbuf_n(unp->unp_socket ?
2133 &unp->unp_socket->so_rcv : NULL, xsbrcv);
2134 sbtoxsockbuf_n(unp->unp_socket ?
2135 &unp->unp_socket->so_snd : NULL, xsbsnd);
2136 sbtoxsockstat_n(unp->unp_socket, xsostats);
2137
2138 error = SYSCTL_OUT(req, buf, item_size);
2139 if (error != 0) {
2140 break;
2141 }
2142 }
2143 if (error == 0) {
2144 /*
2145 * Give the user an updated idea of our state.
2146 * If the generation differs from what we told
2147 * her before, she knows that something happened
2148 * while we were processing this request, and it
2149 * might be necessary to retry.
2150 */
2151 bzero(&xug, sizeof(xug));
2152 xug.xug_len = sizeof(xug);
2153 xug.xug_gen = unp_gencnt;
2154 xug.xug_sogen = so_gencnt;
2155 xug.xug_count = unp_count;
2156 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2157 }
2158 done:
2159 lck_rw_done(&unp_list_mtx);
2160 kfree_data(buf, item_size);
2161 return error;
2162 }
2163
2164 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist_n,
2165 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2166 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist_n, "S,xunpcb_n",
2167 "List of active local datagram sockets");
2168 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist_n,
2169 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2170 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist_n, "S,xunpcb_n",
2171 "List of active local stream sockets");
2172
2173 static void
unp_shutdown(struct unpcb * unp)2174 unp_shutdown(struct unpcb *unp)
2175 {
2176 struct socket *so = unp->unp_socket;
2177 struct socket *so2;
2178 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
2179 so2 = unp->unp_conn->unp_socket;
2180 unp_get_locks_in_order(so, so2);
2181 socantrcvmore(so2);
2182 socket_unlock(so2, 1);
2183 }
2184 }
2185
2186 static void
unp_drop(struct unpcb * unp,int errno)2187 unp_drop(struct unpcb *unp, int errno)
2188 {
2189 struct socket *so = unp->unp_socket;
2190
2191 so->so_error = (u_short)errno;
2192 unp_disconnect(unp);
2193 }
2194
2195 /*
2196 * fg_insertuipc_mark
2197 *
2198 * Description: Mark fileglob for insertion onto message queue if needed
2199 * Also takes fileglob reference
2200 *
2201 * Parameters: fg Fileglob pointer to insert
2202 *
2203 * Returns: true, if the fileglob needs to be inserted onto msg queue
2204 *
2205 * Locks: Takes and drops fg_lock, potentially many times
2206 */
2207 static boolean_t
fg_insertuipc_mark(struct fileglob * fg)2208 fg_insertuipc_mark(struct fileglob * fg)
2209 {
2210 boolean_t insert = FALSE;
2211
2212 lck_mtx_lock_spin(&fg->fg_lock);
2213 while (fg->fg_lflags & FG_RMMSGQ) {
2214 lck_mtx_convert_spin(&fg->fg_lock);
2215
2216 fg->fg_lflags |= FG_WRMMSGQ;
2217 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
2218 }
2219
2220 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2221 fg->fg_msgcount++;
2222 if (fg->fg_msgcount == 1) {
2223 fg->fg_lflags |= FG_INSMSGQ;
2224 insert = TRUE;
2225 }
2226 lck_mtx_unlock(&fg->fg_lock);
2227 return insert;
2228 }
2229
2230 /*
2231 * fg_insertuipc
2232 *
2233 * Description: Insert marked fileglob onto message queue
2234 *
2235 * Parameters: fg Fileglob pointer to insert
2236 *
2237 * Returns: void
2238 *
2239 * Locks: Takes and drops fg_lock & uipc_lock
2240 * DO NOT call this function with proc_fdlock held as unp_gc()
2241 * can potentially try to acquire proc_fdlock, which can result
2242 * in a deadlock.
2243 */
2244 static void
fg_insertuipc(struct fileglob * fg)2245 fg_insertuipc(struct fileglob * fg)
2246 {
2247 if (fg->fg_lflags & FG_INSMSGQ) {
2248 lck_mtx_lock(&uipc_lock);
2249 LIST_INSERT_HEAD(&unp_msghead, fg, f_msglist);
2250 lck_mtx_unlock(&uipc_lock);
2251 lck_mtx_lock(&fg->fg_lock);
2252 fg->fg_lflags &= ~FG_INSMSGQ;
2253 if (fg->fg_lflags & FG_WINSMSGQ) {
2254 fg->fg_lflags &= ~FG_WINSMSGQ;
2255 wakeup(&fg->fg_lflags);
2256 }
2257 lck_mtx_unlock(&fg->fg_lock);
2258 }
2259 }
2260
2261 /*
2262 * fg_removeuipc_mark
2263 *
2264 * Description: Mark the fileglob for removal from message queue if needed
2265 * Also releases fileglob message queue reference
2266 *
2267 * Parameters: fg Fileglob pointer to remove
2268 *
2269 * Returns: true, if the fileglob needs to be removed from msg queue
2270 *
2271 * Locks: Takes and drops fg_lock, potentially many times
2272 */
2273 static boolean_t
fg_removeuipc_mark(struct fileglob * fg)2274 fg_removeuipc_mark(struct fileglob * fg)
2275 {
2276 boolean_t remove = FALSE;
2277
2278 lck_mtx_lock_spin(&fg->fg_lock);
2279 while (fg->fg_lflags & FG_INSMSGQ) {
2280 lck_mtx_convert_spin(&fg->fg_lock);
2281
2282 fg->fg_lflags |= FG_WINSMSGQ;
2283 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
2284 }
2285 fg->fg_msgcount--;
2286 if (fg->fg_msgcount == 0) {
2287 fg->fg_lflags |= FG_RMMSGQ;
2288 remove = TRUE;
2289 }
2290 lck_mtx_unlock(&fg->fg_lock);
2291 return remove;
2292 }
2293
2294 /*
2295 * fg_removeuipc
2296 *
2297 * Description: Remove marked fileglob from message queue
2298 *
2299 * Parameters: fg Fileglob pointer to remove
2300 *
2301 * Returns: void
2302 *
2303 * Locks: Takes and drops fg_lock & uipc_lock
2304 * DO NOT call this function with proc_fdlock held as unp_gc()
2305 * can potentially try to acquire proc_fdlock, which can result
2306 * in a deadlock.
2307 */
2308 static void
fg_removeuipc(struct fileglob * fg)2309 fg_removeuipc(struct fileglob * fg)
2310 {
2311 if (fg->fg_lflags & FG_RMMSGQ) {
2312 lck_mtx_lock(&uipc_lock);
2313 LIST_REMOVE(fg, f_msglist);
2314 lck_mtx_unlock(&uipc_lock);
2315 lck_mtx_lock(&fg->fg_lock);
2316 fg->fg_lflags &= ~FG_RMMSGQ;
2317 if (fg->fg_lflags & FG_WRMMSGQ) {
2318 fg->fg_lflags &= ~FG_WRMMSGQ;
2319 wakeup(&fg->fg_lflags);
2320 }
2321 lck_mtx_unlock(&fg->fg_lock);
2322 }
2323 }
2324
2325 /*
2326 * Returns: 0 Success
2327 * EMSGSIZE The new fd's will not fit
2328 * ENOBUFS Cannot alloc struct fileproc
2329 */
2330 int
unp_externalize(struct mbuf * rights)2331 unp_externalize(struct mbuf *rights)
2332 {
2333 proc_t p = current_proc();
2334 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
2335 struct fileglob **rp = (struct fileglob **)(cm + 1);
2336 const int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2337 int *fds __bidi_indexable;
2338 int error = 0;
2339
2340 fds = kalloc_data(newfds * sizeof(int), Z_WAITOK);
2341 if (fds == NULL) {
2342 error = ENOMEM;
2343 goto out;
2344 }
2345
2346 /*
2347 * Step 1:
2348 * Allocate all the fds, and if it doesn't fit,
2349 * then fail and discard everything.
2350 */
2351 proc_fdlock(p);
2352
2353 if (fdt_available_locked(p, newfds)) {
2354 for (int i = 0; i < newfds; i++) {
2355 error = fdalloc(p, 0, &fds[i]);
2356 if (error) {
2357 while (i-- > 0) {
2358 fdrelse(p, fds[i]);
2359 }
2360 break;
2361 }
2362 }
2363 } else {
2364 error = EMSGSIZE;
2365 }
2366
2367 proc_fdunlock(p);
2368
2369 if (error) {
2370 goto out;
2371 }
2372
2373 /*
2374 * Step 2:
2375 * At this point we are commited, and can't fail anymore.
2376 * Allocate all the fileprocs, and remove the files
2377 * from the queue.
2378 *
2379 * Until we call procfdtbl_releasefd(), fds are in flux
2380 * and can't be closed.
2381 */
2382 for (int i = 0; i < newfds; i++) {
2383 struct fileproc *fp = NULL;
2384
2385 fp = fileproc_alloc_init();
2386 fp->fp_glob = rp[i];
2387 if (fg_removeuipc_mark(rp[i])) {
2388 fg_removeuipc(rp[i]);
2389 }
2390
2391 proc_fdlock(p);
2392 procfdtbl_releasefd(p, fds[i], fp);
2393 proc_fdunlock(p);
2394 }
2395
2396 /*
2397 * Step 3:
2398 * Return the fds into `cm`.
2399 * Handle the fact ints and pointers do not have the same size.
2400 */
2401 int *fds_out = (int *)(cm + 1);
2402 memcpy(fds_out, fds, newfds * sizeof(int));
2403 if (sizeof(struct fileglob *) != sizeof(int)) {
2404 bzero(fds_out + newfds,
2405 newfds * (sizeof(struct fileglob *) - sizeof(int)));
2406 }
2407 OSAddAtomic(-newfds, &unp_rights);
2408
2409 out:
2410 if (error) {
2411 for (int i = 0; i < newfds; i++) {
2412 unp_discard(rp[i], p);
2413 }
2414 bzero(rp, newfds * sizeof(struct fileglob *));
2415 }
2416
2417 kfree_data(fds, newfds * sizeof(int));
2418 return error;
2419 }
2420
2421 void
unp_init(void)2422 unp_init(void)
2423 {
2424 _CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
2425 LIST_INIT(&unp_dhead);
2426 LIST_INIT(&unp_shead);
2427 }
2428
2429 #ifndef MIN
2430 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
2431 #endif
2432
2433 /*
2434 * Returns: 0 Success
2435 * EINVAL
2436 * EBADF
2437 */
2438 static int
unp_internalize(struct mbuf * control,proc_t p)2439 unp_internalize(struct mbuf *control, proc_t p)
2440 {
2441 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
2442 int *fds;
2443 struct fileglob **rp;
2444 struct fileproc *fp;
2445 int i, error;
2446 int oldfds;
2447 uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
2448
2449 /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
2450 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
2451 (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
2452 return EINVAL;
2453 }
2454 oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2455 bzero(fg_ins, sizeof(fg_ins));
2456
2457 proc_fdlock(p);
2458 fds = (int *)(cm + 1);
2459
2460 for (i = 0; i < oldfds; i++) {
2461 struct fileproc *tmpfp;
2462 if ((tmpfp = fp_get_noref_locked(p, fds[i])) == NULL) {
2463 proc_fdunlock(p);
2464 return EBADF;
2465 } else if (!fg_sendable(tmpfp->fp_glob)) {
2466 proc_fdunlock(p);
2467 return EINVAL;
2468 } else if (fp_isguarded(tmpfp, GUARD_SOCKET_IPC)) {
2469 error = fp_guard_exception(p,
2470 fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
2471 proc_fdunlock(p);
2472 return error;
2473 }
2474 }
2475 rp = (struct fileglob **)(cm + 1);
2476
2477 /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2478 * and doing them in-order would result in stomping over unprocessed fd's
2479 */
2480 for (i = (oldfds - 1); i >= 0; i--) {
2481 fp = fp_get_noref_locked(p, fds[i]);
2482 if (fg_insertuipc_mark(fp->fp_glob)) {
2483 fg_ins[i / 8] |= 0x80 >> (i % 8);
2484 }
2485 rp[i] = fp->fp_glob;
2486 }
2487 proc_fdunlock(p);
2488
2489 for (i = 0; i < oldfds; i++) {
2490 if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
2491 VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2492 fg_insertuipc(rp[i]);
2493 }
2494 (void) OSAddAtomic(1, &unp_rights);
2495 }
2496
2497 return 0;
2498 }
2499
2500 static void
unp_gc(thread_call_param_t arg0,thread_call_param_t arg1)2501 unp_gc(thread_call_param_t arg0, thread_call_param_t arg1)
2502 {
2503 #pragma unused(arg0, arg1)
2504 struct fileglob *fg;
2505 struct socket *so;
2506 static struct fileglob **extra_ref;
2507 struct fileglob **fpp;
2508 int nunref, i;
2509
2510 restart:
2511 lck_mtx_lock(&uipc_lock);
2512 unp_defer = 0;
2513 /*
2514 * before going through all this, set all FDs to
2515 * be NOT defered and NOT externally accessible
2516 */
2517 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2518 os_atomic_andnot(&fg->fg_flag, FMARK | FDEFER, relaxed);
2519 }
2520 do {
2521 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2522 lck_mtx_lock(&fg->fg_lock);
2523 /*
2524 * If the file is not open, skip it
2525 */
2526 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2527 lck_mtx_unlock(&fg->fg_lock);
2528 continue;
2529 }
2530 /*
2531 * If we already marked it as 'defer' in a
2532 * previous pass, then try process it this time
2533 * and un-mark it
2534 */
2535 if (fg->fg_flag & FDEFER) {
2536 os_atomic_andnot(&fg->fg_flag, FDEFER, relaxed);
2537 unp_defer--;
2538 } else {
2539 /*
2540 * if it's not defered, then check if it's
2541 * already marked.. if so skip it
2542 */
2543 if (fg->fg_flag & FMARK) {
2544 lck_mtx_unlock(&fg->fg_lock);
2545 continue;
2546 }
2547 /*
2548 * If all references are from messages
2549 * in transit, then skip it. it's not
2550 * externally accessible.
2551 */
2552 if (os_ref_get_count_raw(&fg->fg_count) ==
2553 fg->fg_msgcount) {
2554 lck_mtx_unlock(&fg->fg_lock);
2555 continue;
2556 }
2557 /*
2558 * If it got this far then it must be
2559 * externally accessible.
2560 */
2561 os_atomic_or(&fg->fg_flag, FMARK, relaxed);
2562 }
2563 /*
2564 * either it was defered, or it is externally
2565 * accessible and not already marked so.
2566 * Now check if it is possibly one of OUR sockets.
2567 */
2568 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2569 (so = (struct socket *)fg_get_data(fg)) == 0) {
2570 lck_mtx_unlock(&fg->fg_lock);
2571 continue;
2572 }
2573 if (so->so_proto->pr_domain != localdomain ||
2574 (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
2575 lck_mtx_unlock(&fg->fg_lock);
2576 continue;
2577 }
2578 /*
2579 * So, Ok, it's one of our sockets and it IS externally
2580 * accessible (or was defered). Now we look
2581 * to see if we hold any file descriptors in its
2582 * message buffers. Follow those links and mark them
2583 * as accessible too.
2584 *
2585 * In case a file is passed onto itself we need to
2586 * release the file lock.
2587 */
2588 lck_mtx_unlock(&fg->fg_lock);
2589 /*
2590 * It's safe to lock the socket after dropping fg_lock
2591 * because the socket isn't going away at this point.
2592 *
2593 * If we couldn't lock the socket or the socket buffer,
2594 * then it's because someone holding one of these
2595 * locks is stuck in unp_{internalize,externalize}().
2596 * Yield to that process and restart the garbage
2597 * collection.
2598 */
2599 if (!socket_try_lock(so)) {
2600 lck_mtx_unlock(&uipc_lock);
2601 goto restart;
2602 }
2603 so->so_usecount++;
2604 /*
2605 * Lock the receive socket buffer so that we can
2606 * iterate over its mbuf list.
2607 */
2608 if (sblock(&so->so_rcv, SBL_NOINTR | SBL_IGNDEFUNCT)) {
2609 socket_unlock(so, 1);
2610 lck_mtx_unlock(&uipc_lock);
2611 goto restart;
2612 }
2613 VERIFY(so->so_rcv.sb_flags & SB_LOCK);
2614 socket_unlock(so, 0);
2615 unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
2616 socket_lock(so, 0);
2617 sbunlock(&so->so_rcv, TRUE);
2618 /*
2619 * Unlock and release the reference acquired above.
2620 */
2621 socket_unlock(so, 1);
2622 }
2623 } while (unp_defer);
2624 /*
2625 * We grab an extra reference to each of the file table entries
2626 * that are not otherwise accessible and then free the rights
2627 * that are stored in messages on them.
2628 *
2629 * Here, we first take an extra reference to each inaccessible
2630 * descriptor. Then, we call sorflush ourself, since we know
2631 * it is a Unix domain socket anyhow. After we destroy all the
2632 * rights carried in messages, we do a last closef to get rid
2633 * of our extra reference. This is the last close, and the
2634 * unp_detach etc will shut down the socket.
2635 *
2636 * 91/09/19, [email protected]
2637 */
2638 size_t extra_ref_size = nfiles;
2639 extra_ref = kalloc_type(struct fileglob *, extra_ref_size, Z_WAITOK);
2640 if (extra_ref == NULL) {
2641 lck_mtx_unlock(&uipc_lock);
2642 return;
2643 }
2644 nunref = 0;
2645 fpp = extra_ref;
2646 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2647 lck_mtx_lock(&fg->fg_lock);
2648 /*
2649 * If it's not open, skip it
2650 */
2651 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2652 lck_mtx_unlock(&fg->fg_lock);
2653 continue;
2654 }
2655 /*
2656 * If all refs are from msgs, and it's not marked accessible
2657 * then it must be referenced from some unreachable cycle
2658 * of (shut-down) FDs, so include it in our
2659 * list of FDs to remove
2660 */
2661 if (fg->fg_flag & FMARK) {
2662 lck_mtx_unlock(&fg->fg_lock);
2663 continue;
2664 }
2665 if (os_ref_get_count_raw(&fg->fg_count) == fg->fg_msgcount) {
2666 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2667 *fpp++ = fg;
2668 nunref++;
2669 }
2670 lck_mtx_unlock(&fg->fg_lock);
2671 }
2672 lck_mtx_unlock(&uipc_lock);
2673
2674 /*
2675 * for each FD on our hit list, do the following two things
2676 */
2677 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2678 struct fileglob *tfg;
2679
2680 tfg = *fpp;
2681
2682 if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET) {
2683 so = (struct socket *)fg_get_data(tfg);
2684
2685 if (so) {
2686 socket_lock(so, 0);
2687 sorflush(so);
2688 socket_unlock(so, 0);
2689 }
2690 }
2691 }
2692 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2693 fg_drop(PROC_NULL, *fpp);
2694 }
2695
2696 kfree_type(struct fileglob *, extra_ref_size, extra_ref);
2697 }
2698
2699 void
unp_dispose(struct mbuf * m)2700 unp_dispose(struct mbuf *m)
2701 {
2702 if (m) {
2703 unp_scan(m, unp_discard, NULL);
2704 }
2705 }
2706
2707 /*
2708 * Returns: 0 Success
2709 */
2710 static int
unp_listen(struct unpcb * unp,proc_t p)2711 unp_listen(struct unpcb *unp, proc_t p)
2712 {
2713 kauth_cred_t safecred __single = kauth_cred_proc_ref(p);
2714 cru2x(safecred, &unp->unp_peercred);
2715 kauth_cred_unref(&safecred);
2716 unp->unp_flags |= UNP_HAVEPCCACHED;
2717 return 0;
2718 }
2719
2720 static void
unp_scan(struct mbuf * m0,void (* op)(struct fileglob *,void * arg),void * arg)2721 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
2722 {
2723 struct mbuf *m;
2724 struct fileglob **rp;
2725 struct cmsghdr *cm;
2726 int i;
2727 int qfds;
2728
2729 while (m0) {
2730 for (m = m0; m; m = m->m_next) {
2731 if (m->m_type == MT_CONTROL &&
2732 (size_t)m->m_len >= sizeof(*cm)) {
2733 cm = mtod(m, struct cmsghdr *);
2734 if (cm->cmsg_level != SOL_SOCKET ||
2735 cm->cmsg_type != SCM_RIGHTS) {
2736 continue;
2737 }
2738 qfds = (cm->cmsg_len - sizeof(*cm)) /
2739 sizeof(int);
2740 rp = (struct fileglob **)(cm + 1);
2741 for (i = 0; i < qfds; i++) {
2742 (*op)(*rp++, arg);
2743 }
2744 break; /* XXX, but saves time */
2745 }
2746 }
2747 m0 = m0->m_act;
2748 }
2749 }
2750
2751 static void
unp_mark(struct fileglob * fg,__unused void * arg)2752 unp_mark(struct fileglob *fg, __unused void *arg)
2753 {
2754 uint32_t oflags, nflags;
2755
2756 os_atomic_rmw_loop(&fg->fg_flag, oflags, nflags, relaxed, {
2757 if (oflags & FMARK) {
2758 os_atomic_rmw_loop_give_up(return );
2759 }
2760 nflags = oflags | FMARK | FDEFER;
2761 });
2762
2763 unp_defer++;
2764 }
2765
2766 static void
unp_discard(struct fileglob * fg,void * p)2767 unp_discard(struct fileglob *fg, void *p)
2768 {
2769 if (p == NULL) {
2770 p = current_proc(); /* XXX */
2771 }
2772 (void) OSAddAtomic(1, &unp_disposed);
2773 if (fg_removeuipc_mark(fg)) {
2774 VERIFY(fg->fg_lflags & FG_RMMSGQ);
2775 fg_removeuipc(fg);
2776 }
2777 (void) OSAddAtomic(-1, &unp_rights);
2778
2779 (void) fg_drop(p, fg);
2780 }
2781
2782 int
unp_lock(struct socket * so,int refcount,void * lr)2783 unp_lock(struct socket *so, int refcount, void * lr)
2784 {
2785 void * lr_saved __single;
2786 if (lr == 0) {
2787 lr_saved = __unsafe_forge_single(void*, __builtin_return_address(0));
2788 } else {
2789 lr_saved = lr;
2790 }
2791
2792 if (so->so_pcb) {
2793 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2794 } else {
2795 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x",
2796 so, lr_saved, so->so_usecount);
2797 }
2798
2799 if (so->so_usecount < 0) {
2800 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x",
2801 so, so->so_pcb, lr_saved, so->so_usecount);
2802 }
2803
2804 if (refcount) {
2805 VERIFY(so->so_usecount > 0);
2806 so->so_usecount++;
2807 }
2808 so->lock_lr[so->next_lock_lr] = lr_saved;
2809 so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2810 return 0;
2811 }
2812
2813 int
unp_unlock(struct socket * so,int refcount,void * lr)2814 unp_unlock(struct socket *so, int refcount, void * lr)
2815 {
2816 void * lr_saved __single;
2817 lck_mtx_t * mutex_held = NULL;
2818 struct unpcb *unp __single = sotounpcb(so);
2819
2820 if (lr == 0) {
2821 lr_saved = __unsafe_forge_single(void*, __builtin_return_address(0));
2822 } else {
2823 lr_saved = lr;
2824 }
2825
2826 if (refcount) {
2827 so->so_usecount--;
2828 }
2829
2830 if (so->so_usecount < 0) {
2831 panic("unp_unlock: so=%p usecount=%x", so, so->so_usecount);
2832 }
2833 if (so->so_pcb == NULL) {
2834 panic("unp_unlock: so=%p NO PCB usecount=%x", so, so->so_usecount);
2835 } else {
2836 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2837 }
2838 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2839 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2840 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2841
2842 if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2843 sofreelastref(so, 1);
2844
2845 if (unp->unp_addr != NULL) {
2846 free_sockaddr(unp->unp_addr);
2847 }
2848
2849 lck_mtx_unlock(mutex_held);
2850
2851 lck_mtx_destroy(&unp->unp_mtx, &unp_mtx_grp);
2852 zfree(unp_zone, unp);
2853 thread_call_enter(unp_gc_tcall);
2854 } else {
2855 lck_mtx_unlock(mutex_held);
2856 }
2857
2858 return 0;
2859 }
2860
2861 lck_mtx_t *
unp_getlock(struct socket * so,__unused int flags)2862 unp_getlock(struct socket *so, __unused int flags)
2863 {
2864 struct unpcb *unp = (struct unpcb *)so->so_pcb;
2865
2866
2867 if (so->so_pcb) {
2868 if (so->so_usecount < 0) {
2869 panic("unp_getlock: so=%p usecount=%x", so, so->so_usecount);
2870 }
2871 return &unp->unp_mtx;
2872 } else {
2873 panic("unp_getlock: so=%p NULL so_pcb", so);
2874 return so->so_proto->pr_domain->dom_mtx;
2875 }
2876 }
2877