1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
61 */
62 /*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68 #include <os/log.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/domain.h>
73 #include <sys/fcntl.h>
74 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */
75 #include <sys/file_internal.h>
76 #include <sys/guarded.h>
77 #include <sys/filedesc.h>
78 #include <sys/lock.h>
79 #include <sys/mbuf.h>
80 #include <sys/namei.h>
81 #include <sys/proc_internal.h>
82 #include <sys/kauth.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/stat.h>
87 #include <sys/sysctl.h>
88 #include <sys/un.h>
89 #include <sys/unpcb.h>
90 #include <sys/vnode_internal.h>
91 #include <sys/kdebug.h>
92 #include <sys/mcache.h>
93
94 #include <kern/zalloc.h>
95 #include <kern/locks.h>
96 #include <kern/task.h>
97
98 #if CONFIG_MACF
99 #include <security/mac_framework.h>
100 #endif /* CONFIG_MACF */
101
102 #include <mach/vm_param.h>
103
104 #ifndef ROUNDUP64
105 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
106 #endif
107
108 #ifndef ADVANCE64
109 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
110 #endif
111
112 /*
113 * Maximum number of FDs that can be passed in an mbuf
114 */
115 #define UIPC_MAX_CMSG_FD 512
116
117 ZONE_DEFINE_TYPE(unp_zone, "unpzone", struct unpcb, ZC_NONE);
118 static unp_gen_t unp_gencnt;
119 static u_int unp_count;
120
121 static LCK_ATTR_DECLARE(unp_mtx_attr, 0, 0);
122 static LCK_GRP_DECLARE(unp_mtx_grp, "unp_list");
123 static LCK_RW_DECLARE_ATTR(unp_list_mtx, &unp_mtx_grp, &unp_mtx_attr);
124
125 static LCK_MTX_DECLARE_ATTR(unp_disconnect_lock, &unp_mtx_grp, &unp_mtx_attr);
126 static LCK_MTX_DECLARE_ATTR(unp_connect_lock, &unp_mtx_grp, &unp_mtx_attr);
127 static LCK_MTX_DECLARE_ATTR(uipc_lock, &unp_mtx_grp, &unp_mtx_attr);
128
129 static u_int disconnect_in_progress;
130
131 static struct unp_head unp_shead, unp_dhead;
132 static int unp_defer;
133 static thread_call_t unp_gc_tcall;
134 static LIST_HEAD(, fileglob) unp_msghead = LIST_HEAD_INITIALIZER(unp_msghead);
135
136 SYSCTL_DECL(_net_local);
137
138 static int unp_rights; /* file descriptors in flight */
139 static int unp_disposed; /* discarded file descriptors */
140
141 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
142
143 #define ULEF_CONNECTION 0x01
144 uint32_t unp_log_enable_flags = 0;
145
146 SYSCTL_UINT(_net_local, OID_AUTO, log, CTLFLAG_RD | CTLFLAG_LOCKED,
147 &unp_log_enable_flags, 0, "");
148
149
150 /*
151 * mDNSResponder tracing. When enabled, endpoints connected to
152 * /var/run/mDNSResponder will be traced; during each send on
153 * the traced socket, we log the PID and process name of the
154 * sending process. We also print out a bit of info related
155 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
156 * of mDNSResponder stays the same.
157 */
158 #define MDNSRESPONDER_PATH "/var/run/mDNSResponder"
159
160 static int unpst_tracemdns; /* enable tracing */
161
162 #define MDNS_IPC_MSG_HDR_VERSION_1 1
163
164 struct mdns_ipc_msg_hdr {
165 uint32_t version;
166 uint32_t datalen;
167 uint32_t ipc_flags;
168 uint32_t op;
169 union {
170 void *context;
171 uint32_t u32[2];
172 } __attribute__((packed));
173 uint32_t reg_index;
174 } __attribute__((packed));
175
176 /*
177 * Unix communications domain.
178 *
179 * TODO:
180 * SEQPACKET, RDM
181 * rethink name space problems
182 * need a proper out-of-band
183 * lock pushdown
184 */
185 static struct sockaddr sun_noname = { .sa_len = sizeof(sun_noname), .sa_family = AF_LOCAL, .sa_data = { 0 } };
186 static ino_t unp_ino; /* prototype for fake inode numbers */
187
188 static int unp_attach(struct socket *);
189 static void unp_detach(struct unpcb *);
190 static int unp_bind(struct unpcb *, struct sockaddr *, proc_t);
191 static int unp_connect(struct socket *, struct sockaddr *, proc_t);
192 static void unp_disconnect(struct unpcb *);
193 static void unp_shutdown(struct unpcb *);
194 static void unp_drop(struct unpcb *, int);
195 static void unp_gc(thread_call_param_t arg0, thread_call_param_t arg1);
196 static void unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
197 static void unp_mark(struct fileglob *, __unused void *);
198 static void unp_discard(struct fileglob *, void *);
199 static int unp_internalize(struct mbuf *, proc_t);
200 static int unp_listen(struct unpcb *, proc_t);
201 static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
202 static void unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
203
204 __startup_func
205 static void
unp_gc_setup(void)206 unp_gc_setup(void)
207 {
208 unp_gc_tcall = thread_call_allocate_with_options(unp_gc,
209 NULL, THREAD_CALL_PRIORITY_KERNEL,
210 THREAD_CALL_OPTIONS_ONCE);
211 }
212 STARTUP(THREAD_CALL, STARTUP_RANK_MIDDLE, unp_gc_setup);
213
214 static void
unp_get_locks_in_order(struct socket * so,struct socket * conn_so)215 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
216 {
217 if (so < conn_so) {
218 socket_lock(conn_so, 1);
219 } else {
220 struct unpcb *unp = sotounpcb(so);
221 unp->unp_flags |= UNP_DONTDISCONNECT;
222 unp->rw_thrcount++;
223 socket_unlock(so, 0);
224
225 /* Get the locks in the correct order */
226 socket_lock(conn_so, 1);
227 socket_lock(so, 0);
228 unp->rw_thrcount--;
229 if (unp->rw_thrcount == 0) {
230 unp->unp_flags &= ~UNP_DONTDISCONNECT;
231 wakeup(unp);
232 }
233 }
234 }
235
236 static int
uipc_abort(struct socket * so)237 uipc_abort(struct socket *so)
238 {
239 struct unpcb *unp = sotounpcb(so);
240
241 if (unp == 0) {
242 return EINVAL;
243 }
244 unp_drop(unp, ECONNABORTED);
245 unp_detach(unp);
246 sofree(so);
247 return 0;
248 }
249
250 static int
uipc_accept(struct socket * so,struct sockaddr ** nam)251 uipc_accept(struct socket *so, struct sockaddr **nam)
252 {
253 struct unpcb *unp = sotounpcb(so);
254
255 if (unp == 0) {
256 return EINVAL;
257 }
258
259 /*
260 * Pass back name of connected socket,
261 * if it was bound and we are still connected
262 * (our peer may have closed already!).
263 */
264 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
265 *nam = dup_sockaddr((struct sockaddr *)
266 unp->unp_conn->unp_addr, 1);
267 } else {
268 if (unp_log_enable_flags & ULEF_CONNECTION) {
269 os_log(OS_LOG_DEFAULT, "%s: peer disconnected unp_gencnt %llu",
270 __func__, unp->unp_gencnt);
271 }
272 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
273 }
274 return 0;
275 }
276
277 /*
278 * Returns: 0 Success
279 * EISCONN
280 * unp_attach:
281 */
282 static int
uipc_attach(struct socket * so,__unused int proto,__unused proc_t p)283 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
284 {
285 struct unpcb *unp = sotounpcb(so);
286
287 if (unp != 0) {
288 return EISCONN;
289 }
290 return unp_attach(so);
291 }
292
293 static int
uipc_bind(struct socket * so,struct sockaddr * nam,proc_t p)294 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
295 {
296 struct unpcb *unp = sotounpcb(so);
297
298 if (unp == 0) {
299 return EINVAL;
300 }
301
302 return unp_bind(unp, nam, p);
303 }
304
305 /*
306 * Returns: 0 Success
307 * EINVAL
308 * unp_connect:??? [See elsewhere in this file]
309 */
310 static int
uipc_connect(struct socket * so,struct sockaddr * nam,proc_t p)311 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
312 {
313 struct unpcb *unp = sotounpcb(so);
314
315 if (unp == 0) {
316 return EINVAL;
317 }
318 return unp_connect(so, nam, p);
319 }
320
321 /*
322 * Returns: 0 Success
323 * EINVAL
324 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
325 * unp_connect2:EINVAL Invalid argument
326 */
327 static int
uipc_connect2(struct socket * so1,struct socket * so2)328 uipc_connect2(struct socket *so1, struct socket *so2)
329 {
330 struct unpcb *unp = sotounpcb(so1);
331
332 if (unp == 0) {
333 return EINVAL;
334 }
335
336 return unp_connect2(so1, so2);
337 }
338
339 /* control is EOPNOTSUPP */
340
341 static int
uipc_detach(struct socket * so)342 uipc_detach(struct socket *so)
343 {
344 struct unpcb *unp = sotounpcb(so);
345
346 if (unp == 0) {
347 return EINVAL;
348 }
349
350 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
351 unp_detach(unp);
352 return 0;
353 }
354
355 static int
uipc_disconnect(struct socket * so)356 uipc_disconnect(struct socket *so)
357 {
358 struct unpcb *unp = sotounpcb(so);
359
360 if (unp == 0) {
361 return EINVAL;
362 }
363 unp_disconnect(unp);
364 return 0;
365 }
366
367 /*
368 * Returns: 0 Success
369 * EINVAL
370 */
371 static int
uipc_listen(struct socket * so,__unused proc_t p)372 uipc_listen(struct socket *so, __unused proc_t p)
373 {
374 struct unpcb *unp = sotounpcb(so);
375
376 if (unp == 0 || unp->unp_vnode == 0) {
377 return EINVAL;
378 }
379 return unp_listen(unp, p);
380 }
381
382 static int
uipc_peeraddr(struct socket * so,struct sockaddr ** nam)383 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
384 {
385 struct unpcb *unp = sotounpcb(so);
386 struct socket *so2;
387
388 if (unp == NULL) {
389 return EINVAL;
390 }
391 so2 = unp->unp_conn != NULL ? unp->unp_conn->unp_socket : NULL;
392 if (so2 != NULL) {
393 unp_get_locks_in_order(so, so2);
394 }
395
396 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
397 *nam = dup_sockaddr((struct sockaddr *)
398 unp->unp_conn->unp_addr, 1);
399 } else {
400 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
401 }
402 if (so2 != NULL) {
403 socket_unlock(so2, 1);
404 }
405 return 0;
406 }
407
408 static int
uipc_rcvd(struct socket * so,__unused int flags)409 uipc_rcvd(struct socket *so, __unused int flags)
410 {
411 struct unpcb *unp = sotounpcb(so);
412 struct socket *so2;
413
414 if (unp == 0) {
415 return EINVAL;
416 }
417 switch (so->so_type) {
418 case SOCK_DGRAM:
419 panic("uipc_rcvd DGRAM?");
420 /*NOTREACHED*/
421
422 case SOCK_STREAM:
423 #define rcv (&so->so_rcv)
424 #define snd (&so2->so_snd)
425 if (unp->unp_conn == 0) {
426 break;
427 }
428
429 so2 = unp->unp_conn->unp_socket;
430 unp_get_locks_in_order(so, so2);
431 /*
432 * Adjust backpressure on sender
433 * and wakeup any waiting to write.
434 */
435 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
436 unp->unp_mbcnt = rcv->sb_mbcnt;
437 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
438 unp->unp_cc = rcv->sb_cc;
439 if (sb_notify(&so2->so_snd)) {
440 sowakeup(so2, &so2->so_snd, so);
441 }
442
443 socket_unlock(so2, 1);
444
445 #undef snd
446 #undef rcv
447 break;
448
449 default:
450 panic("uipc_rcvd unknown socktype");
451 }
452 return 0;
453 }
454
455 /* pru_rcvoob is EOPNOTSUPP */
456
457 /*
458 * Returns: 0 Success
459 * EINVAL
460 * EOPNOTSUPP
461 * EPIPE
462 * ENOTCONN
463 * EISCONN
464 * unp_internalize:EINVAL
465 * unp_internalize:EBADF
466 * unp_connect:EAFNOSUPPORT Address family not supported
467 * unp_connect:EINVAL Invalid argument
468 * unp_connect:ENOTSOCK Not a socket
469 * unp_connect:ECONNREFUSED Connection refused
470 * unp_connect:EISCONN Socket is connected
471 * unp_connect:EPROTOTYPE Protocol wrong type for socket
472 * unp_connect:???
473 * sbappendaddr:ENOBUFS [5th argument, contents modified]
474 * sbappendaddr:??? [whatever a filter author chooses]
475 */
476 static int
uipc_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,proc_t p)477 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
478 struct mbuf *control, proc_t p)
479 {
480 int error = 0;
481 struct unpcb *unp = sotounpcb(so);
482 struct socket *so2;
483 int32_t len = m_pktlen(m);
484
485 if (unp == 0) {
486 error = EINVAL;
487 goto release;
488 }
489 if (flags & PRUS_OOB) {
490 error = EOPNOTSUPP;
491 goto release;
492 }
493
494 if (control) {
495 /* release lock to avoid deadlock (4436174) */
496 socket_unlock(so, 0);
497 error = unp_internalize(control, p);
498 socket_lock(so, 0);
499 if (error) {
500 goto release;
501 }
502 }
503
504 switch (so->so_type) {
505 case SOCK_DGRAM:
506 {
507 struct sockaddr *from;
508
509 if (nam) {
510 if (unp->unp_conn) {
511 error = EISCONN;
512 break;
513 }
514 error = unp_connect(so, nam, p);
515 if (error) {
516 so->so_state &= ~SS_ISCONNECTING;
517 break;
518 }
519 } else {
520 if (unp->unp_conn == 0) {
521 error = ENOTCONN;
522 break;
523 }
524 }
525
526 so2 = unp->unp_conn->unp_socket;
527 if (so != so2) {
528 unp_get_locks_in_order(so, so2);
529 }
530
531 if (unp->unp_addr) {
532 from = (struct sockaddr *)unp->unp_addr;
533 } else {
534 from = &sun_noname;
535 }
536 /*
537 * sbappendaddr() will fail when the receiver runs out of
538 * space; in contrast to SOCK_STREAM, we will lose messages
539 * for the SOCK_DGRAM case when the receiver's queue overflows.
540 * SB_UNIX on the socket buffer implies that the callee will
541 * not free the control message, if any, because we would need
542 * to call unp_dispose() on it.
543 */
544 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
545 control = NULL;
546 if (sb_notify(&so2->so_rcv)) {
547 sowakeup(so2, &so2->so_rcv, so);
548 }
549 so2->so_tc_stats[0].rxpackets += 1;
550 so2->so_tc_stats[0].rxbytes += len;
551 } else if (control != NULL && error == 0) {
552 /* A socket filter took control; don't touch it */
553 control = NULL;
554 }
555
556 if (so != so2) {
557 socket_unlock(so2, 1);
558 }
559
560 m = NULL;
561 if (nam) {
562 unp_disconnect(unp);
563 }
564 break;
565 }
566
567 case SOCK_STREAM: {
568 int didreceive = 0;
569 #define rcv (&so2->so_rcv)
570 #define snd (&so->so_snd)
571 /* Connect if not connected yet. */
572 /*
573 * Note: A better implementation would complain
574 * if not equal to the peer's address.
575 */
576 if ((so->so_state & SS_ISCONNECTED) == 0) {
577 if (nam) {
578 error = unp_connect(so, nam, p);
579 if (error) {
580 so->so_state &= ~SS_ISCONNECTING;
581 break; /* XXX */
582 }
583 } else {
584 error = ENOTCONN;
585 break;
586 }
587 }
588
589 if (so->so_state & SS_CANTSENDMORE) {
590 error = EPIPE;
591 break;
592 }
593 if (unp->unp_conn == 0) {
594 panic("uipc_send connected but no connection? "
595 "socket state: %x socket flags: %x socket flags1: %x.",
596 so->so_state, so->so_flags, so->so_flags1);
597 }
598
599 so2 = unp->unp_conn->unp_socket;
600 unp_get_locks_in_order(so, so2);
601
602 /* Check socket state again as we might have unlocked the socket
603 * while trying to get the locks in order
604 */
605
606 if ((so->so_state & SS_CANTSENDMORE)) {
607 error = EPIPE;
608 socket_unlock(so2, 1);
609 break;
610 }
611
612 if (unp->unp_flags & UNP_TRACE_MDNS) {
613 struct mdns_ipc_msg_hdr hdr;
614
615 if (mbuf_copydata(m, 0, sizeof(hdr), &hdr) == 0 &&
616 hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
617 os_log(OS_LOG_DEFAULT,
618 "%s[mDNSResponder] pid=%d (%s): op=0x%x",
619 __func__, proc_getpid(p), p->p_comm, ntohl(hdr.op));
620 }
621 }
622
623 /*
624 * Send to paired receive port, and then reduce send buffer
625 * hiwater marks to maintain backpressure. Wake up readers.
626 * SB_UNIX flag will allow new record to be appended to the
627 * receiver's queue even when it is already full. It is
628 * possible, however, that append might fail. In that case,
629 * we will need to call unp_dispose() on the control message;
630 * the callee will not free it since SB_UNIX is set.
631 */
632 didreceive = control ?
633 sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
634
635 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
636 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
637 if ((int32_t)snd->sb_hiwat >=
638 (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
639 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
640 } else {
641 snd->sb_hiwat = 0;
642 }
643 unp->unp_conn->unp_cc = rcv->sb_cc;
644 if (didreceive) {
645 control = NULL;
646 if (sb_notify(&so2->so_rcv)) {
647 sowakeup(so2, &so2->so_rcv, so);
648 }
649 so2->so_tc_stats[0].rxpackets += 1;
650 so2->so_tc_stats[0].rxbytes += len;
651 } else if (control != NULL && error == 0) {
652 /* A socket filter took control; don't touch it */
653 control = NULL;
654 }
655
656 socket_unlock(so2, 1);
657 m = NULL;
658 #undef snd
659 #undef rcv
660 }
661 break;
662
663 default:
664 panic("uipc_send unknown socktype");
665 }
666
667 so->so_tc_stats[0].txpackets += 1;
668 so->so_tc_stats[0].txbytes += len;
669
670 /*
671 * SEND_EOF is equivalent to a SEND followed by
672 * a SHUTDOWN.
673 */
674 if (flags & PRUS_EOF) {
675 socantsendmore(so);
676 unp_shutdown(unp);
677 }
678
679 if (control && error != 0) {
680 socket_unlock(so, 0);
681 unp_dispose(control);
682 socket_lock(so, 0);
683 }
684
685 release:
686 if (control) {
687 m_freem(control);
688 }
689 if (m) {
690 m_freem(m);
691 }
692 return error;
693 }
694
695 static int
uipc_sense(struct socket * so,void * ub,int isstat64)696 uipc_sense(struct socket *so, void *ub, int isstat64)
697 {
698 struct unpcb *unp = sotounpcb(so);
699 struct socket *so2;
700 blksize_t blksize;
701
702 if (unp == 0) {
703 return EINVAL;
704 }
705
706 blksize = so->so_snd.sb_hiwat;
707 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
708 so2 = unp->unp_conn->unp_socket;
709 blksize += so2->so_rcv.sb_cc;
710 }
711 if (unp->unp_ino == 0) {
712 unp->unp_ino = unp_ino++;
713 }
714
715 if (isstat64 != 0) {
716 struct stat64 *sb64;
717
718 sb64 = (struct stat64 *)ub;
719 sb64->st_blksize = blksize;
720 sb64->st_dev = NODEV;
721 sb64->st_ino = (ino64_t)unp->unp_ino;
722 } else {
723 struct stat *sb;
724
725 sb = (struct stat *)ub;
726 sb->st_blksize = blksize;
727 sb->st_dev = NODEV;
728 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
729 }
730
731 return 0;
732 }
733
734 /*
735 * Returns: 0 Success
736 * EINVAL
737 *
738 * Notes: This is not strictly correct, as unp_shutdown() also calls
739 * socantrcvmore(). These should maybe both be conditionalized
740 * on the 'how' argument in soshutdown() as called from the
741 * shutdown() system call.
742 */
743 static int
uipc_shutdown(struct socket * so)744 uipc_shutdown(struct socket *so)
745 {
746 struct unpcb *unp = sotounpcb(so);
747
748 if (unp == 0) {
749 return EINVAL;
750 }
751 socantsendmore(so);
752 unp_shutdown(unp);
753 return 0;
754 }
755
756 /*
757 * Returns: 0 Success
758 * EINVAL Invalid argument
759 */
760 static int
uipc_sockaddr(struct socket * so,struct sockaddr ** nam)761 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
762 {
763 struct unpcb *unp = sotounpcb(so);
764
765 if (unp == NULL) {
766 return EINVAL;
767 }
768 if (unp->unp_addr != NULL) {
769 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
770 } else {
771 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
772 }
773 return 0;
774 }
775
776 struct pr_usrreqs uipc_usrreqs = {
777 .pru_abort = uipc_abort,
778 .pru_accept = uipc_accept,
779 .pru_attach = uipc_attach,
780 .pru_bind = uipc_bind,
781 .pru_connect = uipc_connect,
782 .pru_connect2 = uipc_connect2,
783 .pru_detach = uipc_detach,
784 .pru_disconnect = uipc_disconnect,
785 .pru_listen = uipc_listen,
786 .pru_peeraddr = uipc_peeraddr,
787 .pru_rcvd = uipc_rcvd,
788 .pru_send = uipc_send,
789 .pru_sense = uipc_sense,
790 .pru_shutdown = uipc_shutdown,
791 .pru_sockaddr = uipc_sockaddr,
792 .pru_sosend = sosend,
793 .pru_soreceive = soreceive,
794 };
795
796 int
uipc_ctloutput(struct socket * so,struct sockopt * sopt)797 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
798 {
799 struct unpcb *unp = sotounpcb(so);
800 int error = 0;
801 pid_t peerpid;
802 proc_t p;
803 task_t t;
804 struct socket *peerso;
805
806 switch (sopt->sopt_dir) {
807 case SOPT_GET:
808 switch (sopt->sopt_name) {
809 case LOCAL_PEERCRED:
810 if (unp->unp_flags & UNP_HAVEPC) {
811 error = sooptcopyout(sopt, &unp->unp_peercred,
812 sizeof(unp->unp_peercred));
813 } else {
814 if (so->so_type == SOCK_STREAM) {
815 error = ENOTCONN;
816 } else {
817 error = EINVAL;
818 }
819 }
820 break;
821 case LOCAL_PEERPID:
822 case LOCAL_PEEREPID:
823 if (unp->unp_conn == NULL) {
824 error = ENOTCONN;
825 break;
826 }
827 peerso = unp->unp_conn->unp_socket;
828 if (peerso == NULL) {
829 panic("peer is connected but has no socket?");
830 }
831 unp_get_locks_in_order(so, peerso);
832 if (sopt->sopt_name == LOCAL_PEEREPID &&
833 peerso->so_flags & SOF_DELEGATED) {
834 peerpid = peerso->e_pid;
835 } else {
836 peerpid = peerso->last_pid;
837 }
838 socket_unlock(peerso, 1);
839 error = sooptcopyout(sopt, &peerpid, sizeof(peerpid));
840 break;
841 case LOCAL_PEERUUID:
842 case LOCAL_PEEREUUID:
843 if (unp->unp_conn == NULL) {
844 error = ENOTCONN;
845 break;
846 }
847 peerso = unp->unp_conn->unp_socket;
848 if (peerso == NULL) {
849 panic("peer is connected but has no socket?");
850 }
851 unp_get_locks_in_order(so, peerso);
852 if (sopt->sopt_name == LOCAL_PEEREUUID &&
853 peerso->so_flags & SOF_DELEGATED) {
854 error = sooptcopyout(sopt, &peerso->e_uuid,
855 sizeof(peerso->e_uuid));
856 } else {
857 error = sooptcopyout(sopt, &peerso->last_uuid,
858 sizeof(peerso->last_uuid));
859 }
860 socket_unlock(peerso, 1);
861 break;
862 case LOCAL_PEERTOKEN:
863 if (unp->unp_conn == NULL) {
864 error = ENOTCONN;
865 break;
866 }
867 peerso = unp->unp_conn->unp_socket;
868 if (peerso == NULL) {
869 panic("peer is connected but has no socket?");
870 }
871 unp_get_locks_in_order(so, peerso);
872 peerpid = peerso->last_pid;
873 p = proc_find(peerpid);
874 if (p != PROC_NULL) {
875 t = proc_task(p);
876 if (t != TASK_NULL) {
877 audit_token_t peertoken;
878 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
879 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&peertoken, &count) == KERN_SUCCESS) {
880 error = sooptcopyout(sopt, &peertoken, sizeof(peertoken));
881 } else {
882 error = EINVAL;
883 }
884 } else {
885 error = EINVAL;
886 }
887 proc_rele(p);
888 } else {
889 error = EINVAL;
890 }
891 socket_unlock(peerso, 1);
892 break;
893 default:
894 error = EOPNOTSUPP;
895 break;
896 }
897 break;
898 case SOPT_SET:
899 default:
900 error = EOPNOTSUPP;
901 break;
902 }
903
904 return error;
905 }
906
907 /*
908 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
909 * for stream sockets, although the total for sender and receiver is
910 * actually only PIPSIZ.
911 * Datagram sockets really use the sendspace as the maximum datagram size,
912 * and don't really want to reserve the sendspace. Their recvspace should
913 * be large enough for at least one max-size datagram plus address.
914 */
915 #ifndef PIPSIZ
916 #define PIPSIZ 8192
917 #endif
918 static u_int32_t unpst_sendspace = PIPSIZ;
919 static u_int32_t unpst_recvspace = PIPSIZ;
920 static u_int32_t unpdg_sendspace = 2 * 1024; /* really max datagram size */
921 static u_int32_t unpdg_recvspace = 4 * 1024;
922
923 SYSCTL_DECL(_net_local_stream);
924 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
925 &unpst_sendspace, 0, "");
926 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
927 &unpst_recvspace, 0, "");
928 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
929 &unpst_tracemdns, 0, "");
930 SYSCTL_DECL(_net_local_dgram);
931 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
932 &unpdg_sendspace, 0, "");
933 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
934 &unpdg_recvspace, 0, "");
935
936 /*
937 * Returns: 0 Success
938 * ENOBUFS
939 * soreserve:ENOBUFS
940 */
941 static int
unp_attach(struct socket * so)942 unp_attach(struct socket *so)
943 {
944 struct unpcb *unp;
945 int error = 0;
946
947 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
948 switch (so->so_type) {
949 case SOCK_STREAM:
950 error = soreserve(so, unpst_sendspace, unpst_recvspace);
951 break;
952
953 case SOCK_DGRAM:
954 /*
955 * By default soreserve() will set the low water
956 * mark to MCLBYTES which is too high given our
957 * default sendspace. Override it here to something
958 * sensible.
959 */
960 so->so_snd.sb_lowat = 1;
961 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
962 break;
963
964 default:
965 panic("unp_attach");
966 }
967 if (error) {
968 return error;
969 }
970 }
971 unp = zalloc_flags(unp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
972
973 lck_mtx_init(&unp->unp_mtx, &unp_mtx_grp, &unp_mtx_attr);
974
975 lck_rw_lock_exclusive(&unp_list_mtx);
976 LIST_INIT(&unp->unp_refs);
977 unp->unp_socket = so;
978 unp->unp_gencnt = ++unp_gencnt;
979 unp_count++;
980 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
981 &unp_dhead : &unp_shead, unp, unp_link);
982 lck_rw_done(&unp_list_mtx);
983 so->so_pcb = (caddr_t)unp;
984 /*
985 * Mark AF_UNIX socket buffers accordingly so that:
986 *
987 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
988 * the lack of space; this essentially loosens the sbspace() check,
989 * since there is disconnect between sosend() and uipc_send() with
990 * respect to flow control that might result in our dropping the
991 * data in uipc_send(). By setting this, we allow for slightly
992 * more records to be appended to the receiving socket to avoid
993 * losing data (which we can't afford in the SOCK_STREAM case).
994 * Flow control still takes place since we adjust the sender's
995 * hiwat during each send. This doesn't affect the SOCK_DGRAM
996 * case and append would still fail when the queue overflows.
997 *
998 * b. In the presence of control messages containing internalized
999 * file descriptors, the append routines will not free them since
1000 * we'd need to undo the work first via unp_dispose().
1001 */
1002 so->so_rcv.sb_flags |= SB_UNIX;
1003 so->so_snd.sb_flags |= SB_UNIX;
1004 return 0;
1005 }
1006
1007 static void
unp_detach(struct unpcb * unp)1008 unp_detach(struct unpcb *unp)
1009 {
1010 int so_locked = 1;
1011
1012 lck_rw_lock_exclusive(&unp_list_mtx);
1013 LIST_REMOVE(unp, unp_link);
1014 --unp_count;
1015 ++unp_gencnt;
1016 lck_rw_done(&unp_list_mtx);
1017 if (unp->unp_vnode) {
1018 struct vnode *tvp = NULL;
1019 socket_unlock(unp->unp_socket, 0);
1020
1021 /* Holding unp_connect_lock will avoid a race between
1022 * a thread closing the listening socket and a thread
1023 * connecting to it.
1024 */
1025 lck_mtx_lock(&unp_connect_lock);
1026 socket_lock(unp->unp_socket, 0);
1027 if (unp->unp_vnode) {
1028 tvp = unp->unp_vnode;
1029 unp->unp_vnode->v_socket = NULL;
1030 unp->unp_vnode = NULL;
1031 }
1032 lck_mtx_unlock(&unp_connect_lock);
1033 if (tvp != NULL) {
1034 vnode_rele(tvp); /* drop the usecount */
1035 }
1036 }
1037 if (unp->unp_conn) {
1038 unp_disconnect(unp);
1039 }
1040 while (unp->unp_refs.lh_first) {
1041 struct unpcb *unp2 = NULL;
1042
1043 /* This datagram socket is connected to one or more
1044 * sockets. In order to avoid a race condition between removing
1045 * this reference and closing the connected socket, we need
1046 * to check disconnect_in_progress
1047 */
1048 if (so_locked == 1) {
1049 socket_unlock(unp->unp_socket, 0);
1050 so_locked = 0;
1051 }
1052 lck_mtx_lock(&unp_disconnect_lock);
1053 while (disconnect_in_progress != 0) {
1054 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1055 PSOCK, "disconnect", NULL);
1056 }
1057 disconnect_in_progress = 1;
1058 lck_mtx_unlock(&unp_disconnect_lock);
1059
1060 /* Now we are sure that any unpcb socket disconnect is not happening */
1061 if (unp->unp_refs.lh_first != NULL) {
1062 unp2 = unp->unp_refs.lh_first;
1063 socket_lock(unp2->unp_socket, 1);
1064 }
1065
1066 lck_mtx_lock(&unp_disconnect_lock);
1067 disconnect_in_progress = 0;
1068 wakeup(&disconnect_in_progress);
1069 lck_mtx_unlock(&unp_disconnect_lock);
1070
1071 if (unp2 != NULL) {
1072 /* We already locked this socket and have a reference on it */
1073 unp_drop(unp2, ECONNRESET);
1074 socket_unlock(unp2->unp_socket, 1);
1075 }
1076 }
1077
1078 if (so_locked == 0) {
1079 socket_lock(unp->unp_socket, 0);
1080 so_locked = 1;
1081 }
1082 soisdisconnected(unp->unp_socket);
1083 /* makes sure we're getting dealloced */
1084 unp->unp_socket->so_flags |= SOF_PCBCLEARING;
1085 }
1086
1087 /*
1088 * Returns: 0 Success
1089 * EAFNOSUPPORT
1090 * EINVAL
1091 * EADDRINUSE
1092 * namei:??? [anything namei can return]
1093 * vnode_authorize:??? [anything vnode_authorize can return]
1094 *
1095 * Notes: p at this point is the current process, as this function is
1096 * only called by sobind().
1097 */
1098 static int
unp_bind(struct unpcb * unp,struct sockaddr * nam,proc_t p)1099 unp_bind(
1100 struct unpcb *unp,
1101 struct sockaddr *nam,
1102 proc_t p)
1103 {
1104 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1105 struct vnode *vp, *dvp;
1106 struct vnode_attr va;
1107 vfs_context_t ctx = vfs_context_current();
1108 int error, namelen;
1109 struct nameidata nd;
1110 struct socket *so = unp->unp_socket;
1111 char buf[SOCK_MAXADDRLEN];
1112
1113 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1114 return EAFNOSUPPORT;
1115 }
1116
1117 /*
1118 * Check if the socket is already bound to an address
1119 */
1120 if (unp->unp_vnode != NULL) {
1121 return EINVAL;
1122 }
1123 /*
1124 * Check if the socket may have been shut down
1125 */
1126 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1127 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1128 return EINVAL;
1129 }
1130
1131 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
1132 if (namelen <= 0) {
1133 return EINVAL;
1134 }
1135 /*
1136 * Note: sun_path is not a zero terminated "C" string
1137 */
1138 if (namelen >= SOCK_MAXADDRLEN) {
1139 return EINVAL;
1140 }
1141 bcopy(soun->sun_path, buf, namelen);
1142 buf[namelen] = 0;
1143
1144 socket_unlock(so, 0);
1145
1146 NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1147 CAST_USER_ADDR_T(buf), ctx);
1148 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1149 error = namei(&nd);
1150 if (error) {
1151 socket_lock(so, 0);
1152 return error;
1153 }
1154 dvp = nd.ni_dvp;
1155 vp = nd.ni_vp;
1156
1157 if (vp != NULL) {
1158 /*
1159 * need to do this before the vnode_put of dvp
1160 * since we may have to release an fs_nodelock
1161 */
1162 nameidone(&nd);
1163
1164 vnode_put(dvp);
1165 vnode_put(vp);
1166
1167 socket_lock(so, 0);
1168 return EADDRINUSE;
1169 }
1170
1171 VATTR_INIT(&va);
1172 VATTR_SET(&va, va_type, VSOCK);
1173 VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd.fd_cmask));
1174
1175 #if CONFIG_MACF
1176 error = mac_vnode_check_create(ctx,
1177 nd.ni_dvp, &nd.ni_cnd, &va);
1178
1179 if (error == 0)
1180 #endif /* CONFIG_MACF */
1181 #if CONFIG_MACF_SOCKET_SUBSET
1182 error = mac_vnode_check_uipc_bind(ctx,
1183 nd.ni_dvp, &nd.ni_cnd, &va);
1184
1185 if (error == 0)
1186 #endif /* MAC_SOCKET_SUBSET */
1187 /* authorize before creating */
1188 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1189
1190 if (!error) {
1191 /* create the socket */
1192 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1193 }
1194
1195 nameidone(&nd);
1196 vnode_put(dvp);
1197
1198 if (error) {
1199 socket_lock(so, 0);
1200 return error;
1201 }
1202
1203 socket_lock(so, 0);
1204
1205 if (unp->unp_vnode != NULL) {
1206 vnode_put(vp); /* drop the iocount */
1207 return EINVAL;
1208 }
1209
1210 error = vnode_ref(vp); /* gain a longterm reference */
1211 if (error) {
1212 vnode_put(vp); /* drop the iocount */
1213 return error;
1214 }
1215
1216 vp->v_socket = unp->unp_socket;
1217 unp->unp_vnode = vp;
1218 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1219 vnode_put(vp); /* drop the iocount */
1220
1221 return 0;
1222 }
1223
1224
1225 /*
1226 * Returns: 0 Success
1227 * EAFNOSUPPORT Address family not supported
1228 * EINVAL Invalid argument
1229 * ENOTSOCK Not a socket
1230 * ECONNREFUSED Connection refused
1231 * EPROTOTYPE Protocol wrong type for socket
1232 * EISCONN Socket is connected
1233 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
1234 * unp_connect2:EINVAL Invalid argument
1235 * namei:??? [anything namei can return]
1236 * vnode_authorize:???? [anything vnode_authorize can return]
1237 *
1238 * Notes: p at this point is the current process, as this function is
1239 * only called by sosend(), sendfile(), and soconnectlock().
1240 */
1241 static int
unp_connect(struct socket * so,struct sockaddr * nam,__unused proc_t p)1242 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1243 {
1244 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1245 struct vnode *vp;
1246 struct socket *so2, *so3, *list_so = NULL;
1247 struct unpcb *unp, *unp2, *unp3;
1248 vfs_context_t ctx = vfs_context_current();
1249 int error, len;
1250 struct nameidata nd;
1251 char buf[SOCK_MAXADDRLEN];
1252
1253 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1254 return EAFNOSUPPORT;
1255 }
1256
1257 unp = sotounpcb(so);
1258 so2 = so3 = NULL;
1259
1260 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1261 if (len <= 0) {
1262 return EINVAL;
1263 }
1264 /*
1265 * Note: sun_path is not a zero terminated "C" string
1266 */
1267 if (len >= SOCK_MAXADDRLEN) {
1268 return EINVAL;
1269 }
1270
1271 soisconnecting(so);
1272
1273 bcopy(soun->sun_path, buf, len);
1274 buf[len] = 0;
1275
1276 socket_unlock(so, 0);
1277
1278 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1279 CAST_USER_ADDR_T(buf), ctx);
1280 error = namei(&nd);
1281 if (error) {
1282 socket_lock(so, 0);
1283 return error;
1284 }
1285 nameidone(&nd);
1286 vp = nd.ni_vp;
1287 if (vp->v_type != VSOCK) {
1288 error = ENOTSOCK;
1289 socket_lock(so, 0);
1290 goto out;
1291 }
1292
1293 #if CONFIG_MACF_SOCKET_SUBSET
1294 error = mac_vnode_check_uipc_connect(ctx, vp, so);
1295 if (error) {
1296 socket_lock(so, 0);
1297 goto out;
1298 }
1299 #endif /* MAC_SOCKET_SUBSET */
1300
1301 error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1302 if (error) {
1303 socket_lock(so, 0);
1304 goto out;
1305 }
1306
1307 lck_mtx_lock(&unp_connect_lock);
1308
1309 if (vp->v_socket == 0) {
1310 lck_mtx_unlock(&unp_connect_lock);
1311 error = ECONNREFUSED;
1312 socket_lock(so, 0);
1313 goto out;
1314 }
1315
1316 socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1317 so2 = vp->v_socket;
1318 lck_mtx_unlock(&unp_connect_lock);
1319
1320
1321 if (so2->so_pcb == NULL) {
1322 error = ECONNREFUSED;
1323 if (so != so2) {
1324 socket_unlock(so2, 1);
1325 socket_lock(so, 0);
1326 } else {
1327 /* Release the reference held for the listen socket */
1328 VERIFY(so2->so_usecount > 0);
1329 so2->so_usecount--;
1330 }
1331 goto out;
1332 }
1333
1334 if (so < so2) {
1335 socket_unlock(so2, 0);
1336 socket_lock(so, 0);
1337 socket_lock(so2, 0);
1338 } else if (so > so2) {
1339 socket_lock(so, 0);
1340 }
1341 /*
1342 * Check if socket was connected while we were trying to
1343 * get the socket locks in order.
1344 * XXX - probably shouldn't return an error for SOCK_DGRAM
1345 */
1346 if ((so->so_state & SS_ISCONNECTED) != 0) {
1347 error = EISCONN;
1348 goto decref_out;
1349 }
1350
1351 if (so->so_type != so2->so_type) {
1352 error = EPROTOTYPE;
1353 goto decref_out;
1354 }
1355
1356 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1357 /* Release the incoming socket but keep a reference */
1358 socket_unlock(so, 0);
1359
1360 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1361 (so3 = sonewconn(so2, 0, nam)) == 0) {
1362 error = ECONNREFUSED;
1363 if (so != so2) {
1364 socket_unlock(so2, 1);
1365 socket_lock(so, 0);
1366 } else {
1367 socket_lock(so, 0);
1368 /* Release the reference held for
1369 * listen socket.
1370 */
1371 VERIFY(so2->so_usecount > 0);
1372 so2->so_usecount--;
1373 }
1374 goto out;
1375 }
1376 unp2 = sotounpcb(so2);
1377 unp3 = sotounpcb(so3);
1378 if (unp2->unp_addr) {
1379 unp3->unp_addr = (struct sockaddr_un *)
1380 dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1381 }
1382
1383 /*
1384 * unp_peercred management:
1385 *
1386 * The connecter's (client's) credentials are copied
1387 * from its process structure at the time of connect()
1388 * (which is now).
1389 */
1390 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1391 unp3->unp_flags |= UNP_HAVEPC;
1392 /*
1393 * The receiver's (server's) credentials are copied
1394 * from the unp_peercred member of socket on which the
1395 * former called listen(); unp_listen() cached that
1396 * process's credentials at that time so we can use
1397 * them now.
1398 */
1399 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1400 ("unp_connect: listener without cached peercred"));
1401
1402 /* Here we need to have both so and so2 locks and so2
1403 * is already locked. Lock ordering is required.
1404 */
1405 if (so < so2) {
1406 socket_unlock(so2, 0);
1407 socket_lock(so, 0);
1408 socket_lock(so2, 0);
1409 } else {
1410 socket_lock(so, 0);
1411 }
1412
1413 /* Check again if the socket state changed when its lock was released */
1414 if ((so->so_state & SS_ISCONNECTED) != 0) {
1415 error = EISCONN;
1416 socket_unlock(so2, 1);
1417 socket_lock(so3, 0);
1418 sofreelastref(so3, 1);
1419 goto out;
1420 }
1421 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1422 sizeof(unp->unp_peercred));
1423 unp->unp_flags |= UNP_HAVEPC;
1424
1425 /* Hold the reference on listening socket until the end */
1426 socket_unlock(so2, 0);
1427 list_so = so2;
1428
1429 /* Lock ordering doesn't matter because so3 was just created */
1430 socket_lock(so3, 1);
1431 so2 = so3;
1432
1433 /*
1434 * Enable tracing for mDNSResponder endpoints. (The use
1435 * of sizeof instead of strlen below takes the null
1436 * terminating character into account.)
1437 */
1438 if (unpst_tracemdns &&
1439 !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1440 sizeof(MDNSRESPONDER_PATH))) {
1441 unp->unp_flags |= UNP_TRACE_MDNS;
1442 unp2->unp_flags |= UNP_TRACE_MDNS;
1443 }
1444 }
1445
1446 error = unp_connect2(so, so2);
1447
1448 decref_out:
1449 if (so2 != NULL) {
1450 if (so != so2) {
1451 socket_unlock(so2, 1);
1452 } else {
1453 /* Release the extra reference held for the listen socket.
1454 * This is possible only for SOCK_DGRAM sockets. We refuse
1455 * connecting to the same socket for SOCK_STREAM sockets.
1456 */
1457 VERIFY(so2->so_usecount > 0);
1458 so2->so_usecount--;
1459 }
1460 }
1461
1462 if (list_so != NULL) {
1463 socket_lock(list_so, 0);
1464 socket_unlock(list_so, 1);
1465 }
1466
1467 out:
1468 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1469 vnode_put(vp);
1470 return error;
1471 }
1472
1473 /*
1474 * Returns: 0 Success
1475 * EPROTOTYPE Protocol wrong type for socket
1476 * EINVAL Invalid argument
1477 */
1478 int
unp_connect2(struct socket * so,struct socket * so2)1479 unp_connect2(struct socket *so, struct socket *so2)
1480 {
1481 struct unpcb *unp = sotounpcb(so);
1482 struct unpcb *unp2;
1483
1484 if (so2->so_type != so->so_type) {
1485 return EPROTOTYPE;
1486 }
1487
1488 unp2 = sotounpcb(so2);
1489
1490 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1491 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1492
1493 /* Verify both sockets are still opened */
1494 if (unp == 0 || unp2 == 0) {
1495 return EINVAL;
1496 }
1497
1498 unp->unp_conn = unp2;
1499 so2->so_usecount++;
1500
1501 switch (so->so_type) {
1502 case SOCK_DGRAM:
1503 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1504
1505 if (so != so2) {
1506 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1507 /* Keep an extra reference on so2 that will be dropped
1508 * soon after getting the locks in order
1509 */
1510 socket_unlock(so2, 0);
1511 soisconnected(so);
1512 unp_get_locks_in_order(so, so2);
1513 VERIFY(so2->so_usecount > 0);
1514 so2->so_usecount--;
1515 } else {
1516 soisconnected(so);
1517 }
1518
1519 break;
1520
1521 case SOCK_STREAM:
1522 /* This takes care of socketpair */
1523 if (!(unp->unp_flags & UNP_HAVEPC) &&
1524 !(unp2->unp_flags & UNP_HAVEPC)) {
1525 cru2x(kauth_cred_get(), &unp->unp_peercred);
1526 unp->unp_flags |= UNP_HAVEPC;
1527
1528 cru2x(kauth_cred_get(), &unp2->unp_peercred);
1529 unp2->unp_flags |= UNP_HAVEPC;
1530 }
1531 unp2->unp_conn = unp;
1532 so->so_usecount++;
1533
1534 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1535 socket_unlock(so, 0);
1536 soisconnected(so2);
1537
1538 /* Keep an extra reference on so2, that will be dropped soon after
1539 * getting the locks in order again.
1540 */
1541 socket_unlock(so2, 0);
1542
1543 socket_lock(so, 0);
1544 soisconnected(so);
1545
1546 unp_get_locks_in_order(so, so2);
1547 /* Decrement the extra reference left before */
1548 VERIFY(so2->so_usecount > 0);
1549 so2->so_usecount--;
1550 break;
1551
1552 default:
1553 panic("unknown socket type %d in unp_connect2", so->so_type);
1554 }
1555 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1556 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1557 return 0;
1558 }
1559
1560 static void
unp_disconnect(struct unpcb * unp)1561 unp_disconnect(struct unpcb *unp)
1562 {
1563 struct unpcb *unp2 = NULL;
1564 struct socket *so2 = NULL, *so;
1565 struct socket *waitso;
1566 int so_locked = 1, strdisconn = 0;
1567
1568 so = unp->unp_socket;
1569 if (unp->unp_conn == NULL) {
1570 return;
1571 }
1572 lck_mtx_lock(&unp_disconnect_lock);
1573 while (disconnect_in_progress != 0) {
1574 if (so_locked == 1) {
1575 socket_unlock(so, 0);
1576 so_locked = 0;
1577 }
1578 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1579 PSOCK, "disconnect", NULL);
1580 }
1581 disconnect_in_progress = 1;
1582 lck_mtx_unlock(&unp_disconnect_lock);
1583
1584 if (so_locked == 0) {
1585 socket_lock(so, 0);
1586 so_locked = 1;
1587 }
1588
1589 unp2 = unp->unp_conn;
1590
1591 if (unp2 == 0 || unp2->unp_socket == NULL) {
1592 goto out;
1593 }
1594 so2 = unp2->unp_socket;
1595
1596 try_again:
1597 if (so == so2) {
1598 if (so_locked == 0) {
1599 socket_lock(so, 0);
1600 }
1601 waitso = so;
1602 } else if (so < so2) {
1603 if (so_locked == 0) {
1604 socket_lock(so, 0);
1605 }
1606 socket_lock(so2, 1);
1607 waitso = so2;
1608 } else {
1609 if (so_locked == 1) {
1610 socket_unlock(so, 0);
1611 }
1612 socket_lock(so2, 1);
1613 socket_lock(so, 0);
1614 waitso = so;
1615 }
1616 so_locked = 1;
1617
1618 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1619 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1620
1621 /* Check for the UNP_DONTDISCONNECT flag, if it
1622 * is set, release both sockets and go to sleep
1623 */
1624
1625 if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1626 if (so != so2) {
1627 socket_unlock(so2, 1);
1628 }
1629 so_locked = 0;
1630
1631 (void)msleep(waitso->so_pcb, &unp->unp_mtx,
1632 PSOCK | PDROP, "unpdisconnect", NULL);
1633 goto try_again;
1634 }
1635
1636 if (unp->unp_conn == NULL) {
1637 panic("unp_conn became NULL after sleep");
1638 }
1639
1640 unp->unp_conn = NULL;
1641 VERIFY(so2->so_usecount > 0);
1642 so2->so_usecount--;
1643
1644 if (unp->unp_flags & UNP_TRACE_MDNS) {
1645 unp->unp_flags &= ~UNP_TRACE_MDNS;
1646 }
1647
1648 switch (unp->unp_socket->so_type) {
1649 case SOCK_DGRAM:
1650 LIST_REMOVE(unp, unp_reflink);
1651 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1652 if (so != so2) {
1653 socket_unlock(so2, 1);
1654 }
1655 break;
1656
1657 case SOCK_STREAM:
1658 unp2->unp_conn = NULL;
1659 VERIFY(so->so_usecount > 0);
1660 so->so_usecount--;
1661
1662 /*
1663 * Set the socket state correctly but do a wakeup later when
1664 * we release all locks except the socket lock, this will avoid
1665 * a deadlock.
1666 */
1667 unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1668 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1669
1670 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1671 unp2->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1672
1673 if (unp2->unp_flags & UNP_TRACE_MDNS) {
1674 unp2->unp_flags &= ~UNP_TRACE_MDNS;
1675 }
1676
1677 strdisconn = 1;
1678 break;
1679 default:
1680 panic("unknown socket type %d", so->so_type);
1681 }
1682 out:
1683 lck_mtx_lock(&unp_disconnect_lock);
1684 disconnect_in_progress = 0;
1685 wakeup(&disconnect_in_progress);
1686 lck_mtx_unlock(&unp_disconnect_lock);
1687
1688 if (strdisconn) {
1689 socket_unlock(so, 0);
1690 soisdisconnected(so2);
1691 socket_unlock(so2, 1);
1692
1693 socket_lock(so, 0);
1694 soisdisconnected(so);
1695 }
1696 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1697 return;
1698 }
1699
1700 /*
1701 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1702 * The unpcb_compat data structure is passed to user space and must not change.
1703 */
1704 static void
unpcb_to_compat(struct unpcb * up,struct unpcb_compat * cp)1705 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1706 {
1707 #if defined(__LP64__)
1708 cp->unp_link.le_next = (u_int32_t)
1709 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1710 cp->unp_link.le_prev = (u_int32_t)
1711 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1712 #else
1713 cp->unp_link.le_next = (struct unpcb_compat *)
1714 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1715 cp->unp_link.le_prev = (struct unpcb_compat **)
1716 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1717 #endif
1718 cp->unp_socket = (_UNPCB_PTR(struct socket *))
1719 VM_KERNEL_ADDRPERM(up->unp_socket);
1720 cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1721 VM_KERNEL_ADDRPERM(up->unp_vnode);
1722 cp->unp_ino = up->unp_ino;
1723 cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1724 VM_KERNEL_ADDRPERM(up->unp_conn);
1725 cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1726 #if defined(__LP64__)
1727 cp->unp_reflink.le_next =
1728 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1729 cp->unp_reflink.le_prev =
1730 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1731 #else
1732 cp->unp_reflink.le_next =
1733 (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1734 cp->unp_reflink.le_prev =
1735 (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1736 #endif
1737 cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1738 VM_KERNEL_ADDRPERM(up->unp_addr);
1739 cp->unp_cc = up->unp_cc;
1740 cp->unp_mbcnt = up->unp_mbcnt;
1741 cp->unp_gencnt = up->unp_gencnt;
1742 }
1743
1744 static int
1745 unp_pcblist SYSCTL_HANDLER_ARGS
1746 {
1747 #pragma unused(oidp,arg2)
1748 int error, i, n;
1749 struct unpcb *unp, **unp_list;
1750 unp_gen_t gencnt;
1751 struct xunpgen xug;
1752 struct unp_head *head;
1753
1754 lck_rw_lock_shared(&unp_list_mtx);
1755 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1756
1757 /*
1758 * The process of preparing the PCB list is too time-consuming and
1759 * resource-intensive to repeat twice on every request.
1760 */
1761 if (req->oldptr == USER_ADDR_NULL) {
1762 n = unp_count;
1763 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1764 sizeof(struct xunpcb);
1765 lck_rw_done(&unp_list_mtx);
1766 return 0;
1767 }
1768
1769 if (req->newptr != USER_ADDR_NULL) {
1770 lck_rw_done(&unp_list_mtx);
1771 return EPERM;
1772 }
1773
1774 /*
1775 * OK, now we're committed to doing something.
1776 */
1777 gencnt = unp_gencnt;
1778 n = unp_count;
1779
1780 bzero(&xug, sizeof(xug));
1781 xug.xug_len = sizeof(xug);
1782 xug.xug_count = n;
1783 xug.xug_gen = gencnt;
1784 xug.xug_sogen = so_gencnt;
1785 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1786 if (error) {
1787 lck_rw_done(&unp_list_mtx);
1788 return error;
1789 }
1790
1791 /*
1792 * We are done if there is no pcb
1793 */
1794 if (n == 0) {
1795 lck_rw_done(&unp_list_mtx);
1796 return 0;
1797 }
1798
1799 size_t unp_list_len = n;
1800 unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1801 if (unp_list == 0) {
1802 lck_rw_done(&unp_list_mtx);
1803 return ENOMEM;
1804 }
1805
1806 for (unp = head->lh_first, i = 0; unp && i < n;
1807 unp = unp->unp_link.le_next) {
1808 if (unp->unp_gencnt <= gencnt) {
1809 unp_list[i++] = unp;
1810 }
1811 }
1812 n = i; /* in case we lost some during malloc */
1813
1814 error = 0;
1815 for (i = 0; i < n; i++) {
1816 unp = unp_list[i];
1817 if (unp->unp_gencnt <= gencnt) {
1818 struct xunpcb xu;
1819
1820 bzero(&xu, sizeof(xu));
1821 xu.xu_len = sizeof(xu);
1822 xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1823 VM_KERNEL_ADDRPERM(unp);
1824 /*
1825 * XXX - need more locking here to protect against
1826 * connect/disconnect races for SMP.
1827 */
1828 if (unp->unp_addr) {
1829 bcopy(unp->unp_addr, &xu.xu_au,
1830 unp->unp_addr->sun_len);
1831 }
1832 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1833 bcopy(unp->unp_conn->unp_addr,
1834 &xu.xu_cau,
1835 unp->unp_conn->unp_addr->sun_len);
1836 }
1837 unpcb_to_compat(unp, &xu.xu_unp);
1838 sotoxsocket(unp->unp_socket, &xu.xu_socket);
1839 error = SYSCTL_OUT(req, &xu, sizeof(xu));
1840 }
1841 }
1842 if (!error) {
1843 /*
1844 * Give the user an updated idea of our state.
1845 * If the generation differs from what we told
1846 * her before, she knows that something happened
1847 * while we were processing this request, and it
1848 * might be necessary to retry.
1849 */
1850 bzero(&xug, sizeof(xug));
1851 xug.xug_len = sizeof(xug);
1852 xug.xug_gen = unp_gencnt;
1853 xug.xug_sogen = so_gencnt;
1854 xug.xug_count = unp_count;
1855 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1856 }
1857 kfree_type(struct unpcb *, unp_list_len, unp_list);
1858 lck_rw_done(&unp_list_mtx);
1859 return error;
1860 }
1861
1862 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1863 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1864 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1865 "List of active local datagram sockets");
1866 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1867 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1868 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1869 "List of active local stream sockets");
1870
1871 #if XNU_TARGET_OS_OSX
1872
1873 static int
1874 unp_pcblist64 SYSCTL_HANDLER_ARGS
1875 {
1876 #pragma unused(oidp,arg2)
1877 int error, i, n;
1878 struct unpcb *unp, **unp_list;
1879 unp_gen_t gencnt;
1880 struct xunpgen xug;
1881 struct unp_head *head;
1882
1883 lck_rw_lock_shared(&unp_list_mtx);
1884 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1885
1886 /*
1887 * The process of preparing the PCB list is too time-consuming and
1888 * resource-intensive to repeat twice on every request.
1889 */
1890 if (req->oldptr == USER_ADDR_NULL) {
1891 n = unp_count;
1892 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1893 (sizeof(struct xunpcb64));
1894 lck_rw_done(&unp_list_mtx);
1895 return 0;
1896 }
1897
1898 if (req->newptr != USER_ADDR_NULL) {
1899 lck_rw_done(&unp_list_mtx);
1900 return EPERM;
1901 }
1902
1903 /*
1904 * OK, now we're committed to doing something.
1905 */
1906 gencnt = unp_gencnt;
1907 n = unp_count;
1908
1909 bzero(&xug, sizeof(xug));
1910 xug.xug_len = sizeof(xug);
1911 xug.xug_count = n;
1912 xug.xug_gen = gencnt;
1913 xug.xug_sogen = so_gencnt;
1914 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1915 if (error) {
1916 lck_rw_done(&unp_list_mtx);
1917 return error;
1918 }
1919
1920 /*
1921 * We are done if there is no pcb
1922 */
1923 if (n == 0) {
1924 lck_rw_done(&unp_list_mtx);
1925 return 0;
1926 }
1927
1928 size_t unp_list_len = n;
1929 unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1930 if (unp_list == 0) {
1931 lck_rw_done(&unp_list_mtx);
1932 return ENOMEM;
1933 }
1934
1935 for (unp = head->lh_first, i = 0; unp && i < n;
1936 unp = unp->unp_link.le_next) {
1937 if (unp->unp_gencnt <= gencnt) {
1938 unp_list[i++] = unp;
1939 }
1940 }
1941 n = i; /* in case we lost some during malloc */
1942
1943 error = 0;
1944 for (i = 0; i < n; i++) {
1945 unp = unp_list[i];
1946 if (unp->unp_gencnt <= gencnt) {
1947 struct xunpcb64 xu;
1948 size_t xu_len = sizeof(struct xunpcb64);
1949
1950 bzero(&xu, xu_len);
1951 xu.xu_len = (u_int32_t)xu_len;
1952 xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1953 xu.xunp_link.le_next = (u_int64_t)
1954 VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1955 xu.xunp_link.le_prev = (u_int64_t)
1956 VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1957 xu.xunp_socket = (u_int64_t)
1958 VM_KERNEL_ADDRPERM(unp->unp_socket);
1959 xu.xunp_vnode = (u_int64_t)
1960 VM_KERNEL_ADDRPERM(unp->unp_vnode);
1961 xu.xunp_ino = unp->unp_ino;
1962 xu.xunp_conn = (u_int64_t)
1963 VM_KERNEL_ADDRPERM(unp->unp_conn);
1964 xu.xunp_refs = (u_int64_t)
1965 VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1966 xu.xunp_reflink.le_next = (u_int64_t)
1967 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1968 xu.xunp_reflink.le_prev = (u_int64_t)
1969 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1970 xu.xunp_cc = unp->unp_cc;
1971 xu.xunp_mbcnt = unp->unp_mbcnt;
1972 xu.xunp_gencnt = unp->unp_gencnt;
1973
1974 if (unp->unp_socket) {
1975 sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1976 }
1977
1978 /*
1979 * XXX - need more locking here to protect against
1980 * connect/disconnect races for SMP.
1981 */
1982 if (unp->unp_addr) {
1983 bcopy(unp->unp_addr, &xu.xu_au,
1984 unp->unp_addr->sun_len);
1985 }
1986 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1987 bcopy(unp->unp_conn->unp_addr,
1988 &xu.xu_cau,
1989 unp->unp_conn->unp_addr->sun_len);
1990 }
1991
1992 error = SYSCTL_OUT(req, &xu, xu_len);
1993 }
1994 }
1995 if (!error) {
1996 /*
1997 * Give the user an updated idea of our state.
1998 * If the generation differs from what we told
1999 * her before, she knows that something happened
2000 * while we were processing this request, and it
2001 * might be necessary to retry.
2002 */
2003 bzero(&xug, sizeof(xug));
2004 xug.xug_len = sizeof(xug);
2005 xug.xug_gen = unp_gencnt;
2006 xug.xug_sogen = so_gencnt;
2007 xug.xug_count = unp_count;
2008 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2009 }
2010 kfree_type(struct unpcb *, unp_list_len, unp_list);
2011 lck_rw_done(&unp_list_mtx);
2012 return error;
2013 }
2014
2015 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
2016 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2017 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
2018 "List of active local datagram sockets 64 bit");
2019 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
2020 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2021 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
2022 "List of active local stream sockets 64 bit");
2023
2024 #endif /* XNU_TARGET_OS_OSX */
2025
2026 static int
2027 unp_pcblist_n SYSCTL_HANDLER_ARGS
2028 {
2029 #pragma unused(oidp,arg2)
2030 int error = 0;
2031 int i, n;
2032 struct unpcb *unp;
2033 unp_gen_t gencnt;
2034 struct xunpgen xug;
2035 struct unp_head *head;
2036 void *buf = NULL;
2037 size_t item_size = ROUNDUP64(sizeof(struct xunpcb_n)) +
2038 ROUNDUP64(sizeof(struct xsocket_n)) +
2039 2 * ROUNDUP64(sizeof(struct xsockbuf_n)) +
2040 ROUNDUP64(sizeof(struct xsockstat_n));
2041
2042 buf = kalloc_data(item_size, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2043
2044 lck_rw_lock_shared(&unp_list_mtx);
2045
2046 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
2047
2048 /*
2049 * The process of preparing the PCB list is too time-consuming and
2050 * resource-intensive to repeat twice on every request.
2051 */
2052 if (req->oldptr == USER_ADDR_NULL) {
2053 n = unp_count;
2054 req->oldidx = 2 * sizeof(xug) + (n + n / 8) * item_size;
2055 goto done;
2056 }
2057
2058 if (req->newptr != USER_ADDR_NULL) {
2059 error = EPERM;
2060 goto done;
2061 }
2062
2063 /*
2064 * OK, now we're committed to doing something.
2065 */
2066 gencnt = unp_gencnt;
2067 n = unp_count;
2068
2069 bzero(&xug, sizeof(xug));
2070 xug.xug_len = sizeof(xug);
2071 xug.xug_count = n;
2072 xug.xug_gen = gencnt;
2073 xug.xug_sogen = so_gencnt;
2074 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2075 if (error != 0) {
2076 goto done;
2077 }
2078
2079 /*
2080 * We are done if there is no pcb
2081 */
2082 if (n == 0) {
2083 goto done;
2084 }
2085
2086 for (i = 0, unp = head->lh_first;
2087 i < n && unp != NULL;
2088 i++, unp = unp->unp_link.le_next) {
2089 struct xunpcb_n *xu = (struct xunpcb_n *)buf;
2090 struct xsocket_n *xso = (struct xsocket_n *)
2091 ADVANCE64(xu, sizeof(*xu));
2092 struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *)
2093 ADVANCE64(xso, sizeof(*xso));
2094 struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *)
2095 ADVANCE64(xsbrcv, sizeof(*xsbrcv));
2096 struct xsockstat_n *xsostats = (struct xsockstat_n *)
2097 ADVANCE64(xsbsnd, sizeof(*xsbsnd));
2098
2099 if (unp->unp_gencnt > gencnt) {
2100 continue;
2101 }
2102
2103 bzero(buf, item_size);
2104
2105 xu->xunp_len = sizeof(struct xunpcb_n);
2106 xu->xunp_kind = XSO_UNPCB;
2107 xu->xunp_unpp = (uint64_t)VM_KERNEL_ADDRPERM(unp);
2108 xu->xunp_vnode = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_vnode);
2109 xu->xunp_ino = unp->unp_ino;
2110 xu->xunp_conn = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_conn);
2111 xu->xunp_refs = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
2112 xu->xunp_reflink = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
2113 xu->xunp_cc = unp->unp_cc;
2114 xu->xunp_mbcnt = unp->unp_mbcnt;
2115 xu->xunp_flags = unp->unp_flags;
2116 xu->xunp_gencnt = unp->unp_gencnt;
2117
2118 if (unp->unp_addr) {
2119 bcopy(unp->unp_addr, &xu->xu_au,
2120 unp->unp_addr->sun_len);
2121 }
2122 if (unp->unp_conn && unp->unp_conn->unp_addr) {
2123 bcopy(unp->unp_conn->unp_addr,
2124 &xu->xu_cau,
2125 unp->unp_conn->unp_addr->sun_len);
2126 }
2127 sotoxsocket_n(unp->unp_socket, xso);
2128 sbtoxsockbuf_n(unp->unp_socket ?
2129 &unp->unp_socket->so_rcv : NULL, xsbrcv);
2130 sbtoxsockbuf_n(unp->unp_socket ?
2131 &unp->unp_socket->so_snd : NULL, xsbsnd);
2132 sbtoxsockstat_n(unp->unp_socket, xsostats);
2133
2134 error = SYSCTL_OUT(req, buf, item_size);
2135 if (error != 0) {
2136 break;
2137 }
2138 }
2139 if (error == 0) {
2140 /*
2141 * Give the user an updated idea of our state.
2142 * If the generation differs from what we told
2143 * her before, she knows that something happened
2144 * while we were processing this request, and it
2145 * might be necessary to retry.
2146 */
2147 bzero(&xug, sizeof(xug));
2148 xug.xug_len = sizeof(xug);
2149 xug.xug_gen = unp_gencnt;
2150 xug.xug_sogen = so_gencnt;
2151 xug.xug_count = unp_count;
2152 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2153 }
2154 done:
2155 lck_rw_done(&unp_list_mtx);
2156 kfree_data(buf, item_size);
2157 return error;
2158 }
2159
2160 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist_n,
2161 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2162 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist_n, "S,xunpcb_n",
2163 "List of active local datagram sockets");
2164 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist_n,
2165 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2166 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist_n, "S,xunpcb_n",
2167 "List of active local stream sockets");
2168
2169 static void
unp_shutdown(struct unpcb * unp)2170 unp_shutdown(struct unpcb *unp)
2171 {
2172 struct socket *so = unp->unp_socket;
2173 struct socket *so2;
2174 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
2175 so2 = unp->unp_conn->unp_socket;
2176 unp_get_locks_in_order(so, so2);
2177 socantrcvmore(so2);
2178 socket_unlock(so2, 1);
2179 }
2180 }
2181
2182 static void
unp_drop(struct unpcb * unp,int errno)2183 unp_drop(struct unpcb *unp, int errno)
2184 {
2185 struct socket *so = unp->unp_socket;
2186
2187 so->so_error = (u_short)errno;
2188 unp_disconnect(unp);
2189 }
2190
2191 /*
2192 * fg_insertuipc_mark
2193 *
2194 * Description: Mark fileglob for insertion onto message queue if needed
2195 * Also takes fileglob reference
2196 *
2197 * Parameters: fg Fileglob pointer to insert
2198 *
2199 * Returns: true, if the fileglob needs to be inserted onto msg queue
2200 *
2201 * Locks: Takes and drops fg_lock, potentially many times
2202 */
2203 static boolean_t
fg_insertuipc_mark(struct fileglob * fg)2204 fg_insertuipc_mark(struct fileglob * fg)
2205 {
2206 boolean_t insert = FALSE;
2207
2208 lck_mtx_lock_spin(&fg->fg_lock);
2209 while (fg->fg_lflags & FG_RMMSGQ) {
2210 lck_mtx_convert_spin(&fg->fg_lock);
2211
2212 fg->fg_lflags |= FG_WRMMSGQ;
2213 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
2214 }
2215
2216 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2217 fg->fg_msgcount++;
2218 if (fg->fg_msgcount == 1) {
2219 fg->fg_lflags |= FG_INSMSGQ;
2220 insert = TRUE;
2221 }
2222 lck_mtx_unlock(&fg->fg_lock);
2223 return insert;
2224 }
2225
2226 /*
2227 * fg_insertuipc
2228 *
2229 * Description: Insert marked fileglob onto message queue
2230 *
2231 * Parameters: fg Fileglob pointer to insert
2232 *
2233 * Returns: void
2234 *
2235 * Locks: Takes and drops fg_lock & uipc_lock
2236 * DO NOT call this function with proc_fdlock held as unp_gc()
2237 * can potentially try to acquire proc_fdlock, which can result
2238 * in a deadlock.
2239 */
2240 static void
fg_insertuipc(struct fileglob * fg)2241 fg_insertuipc(struct fileglob * fg)
2242 {
2243 if (fg->fg_lflags & FG_INSMSGQ) {
2244 lck_mtx_lock(&uipc_lock);
2245 LIST_INSERT_HEAD(&unp_msghead, fg, f_msglist);
2246 lck_mtx_unlock(&uipc_lock);
2247 lck_mtx_lock(&fg->fg_lock);
2248 fg->fg_lflags &= ~FG_INSMSGQ;
2249 if (fg->fg_lflags & FG_WINSMSGQ) {
2250 fg->fg_lflags &= ~FG_WINSMSGQ;
2251 wakeup(&fg->fg_lflags);
2252 }
2253 lck_mtx_unlock(&fg->fg_lock);
2254 }
2255 }
2256
2257 /*
2258 * fg_removeuipc_mark
2259 *
2260 * Description: Mark the fileglob for removal from message queue if needed
2261 * Also releases fileglob message queue reference
2262 *
2263 * Parameters: fg Fileglob pointer to remove
2264 *
2265 * Returns: true, if the fileglob needs to be removed from msg queue
2266 *
2267 * Locks: Takes and drops fg_lock, potentially many times
2268 */
2269 static boolean_t
fg_removeuipc_mark(struct fileglob * fg)2270 fg_removeuipc_mark(struct fileglob * fg)
2271 {
2272 boolean_t remove = FALSE;
2273
2274 lck_mtx_lock_spin(&fg->fg_lock);
2275 while (fg->fg_lflags & FG_INSMSGQ) {
2276 lck_mtx_convert_spin(&fg->fg_lock);
2277
2278 fg->fg_lflags |= FG_WINSMSGQ;
2279 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
2280 }
2281 fg->fg_msgcount--;
2282 if (fg->fg_msgcount == 0) {
2283 fg->fg_lflags |= FG_RMMSGQ;
2284 remove = TRUE;
2285 }
2286 lck_mtx_unlock(&fg->fg_lock);
2287 return remove;
2288 }
2289
2290 /*
2291 * fg_removeuipc
2292 *
2293 * Description: Remove marked fileglob from message queue
2294 *
2295 * Parameters: fg Fileglob pointer to remove
2296 *
2297 * Returns: void
2298 *
2299 * Locks: Takes and drops fg_lock & uipc_lock
2300 * DO NOT call this function with proc_fdlock held as unp_gc()
2301 * can potentially try to acquire proc_fdlock, which can result
2302 * in a deadlock.
2303 */
2304 static void
fg_removeuipc(struct fileglob * fg)2305 fg_removeuipc(struct fileglob * fg)
2306 {
2307 if (fg->fg_lflags & FG_RMMSGQ) {
2308 lck_mtx_lock(&uipc_lock);
2309 LIST_REMOVE(fg, f_msglist);
2310 lck_mtx_unlock(&uipc_lock);
2311 lck_mtx_lock(&fg->fg_lock);
2312 fg->fg_lflags &= ~FG_RMMSGQ;
2313 if (fg->fg_lflags & FG_WRMMSGQ) {
2314 fg->fg_lflags &= ~FG_WRMMSGQ;
2315 wakeup(&fg->fg_lflags);
2316 }
2317 lck_mtx_unlock(&fg->fg_lock);
2318 }
2319 }
2320
2321 /*
2322 * Returns: 0 Success
2323 * EMSGSIZE The new fd's will not fit
2324 * ENOBUFS Cannot alloc struct fileproc
2325 */
2326 int
unp_externalize(struct mbuf * rights)2327 unp_externalize(struct mbuf *rights)
2328 {
2329 proc_t p = current_proc();
2330 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
2331 struct fileglob **rp = (struct fileglob **)(cm + 1);
2332 const int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2333 int *fds;
2334 int error = 0;
2335
2336 fds = kalloc_data(newfds * sizeof(int), Z_WAITOK);
2337 if (fds == NULL) {
2338 error = ENOMEM;
2339 goto out;
2340 }
2341
2342 /*
2343 * Step 1:
2344 * Allocate all the fds, and if it doesn't fit,
2345 * then fail and discard everything.
2346 */
2347 proc_fdlock(p);
2348
2349 if (fdt_available_locked(p, newfds)) {
2350 for (int i = 0; i < newfds; i++) {
2351 error = fdalloc(p, 0, &fds[i]);
2352 if (error) {
2353 while (i-- > 0) {
2354 fdrelse(p, fds[i]);
2355 }
2356 break;
2357 }
2358 }
2359 } else {
2360 error = EMSGSIZE;
2361 }
2362
2363 proc_fdunlock(p);
2364
2365 if (error) {
2366 goto out;
2367 }
2368
2369 /*
2370 * Step 2:
2371 * At this point we are commited, and can't fail anymore.
2372 * Allocate all the fileprocs, and remove the files
2373 * from the queue.
2374 *
2375 * Until we call procfdtbl_releasefd(), fds are in flux
2376 * and can't be closed.
2377 */
2378 for (int i = 0; i < newfds; i++) {
2379 struct fileproc *fp = NULL;
2380
2381 fp = fileproc_alloc_init();
2382 fp->fp_glob = rp[i];
2383 if (fg_removeuipc_mark(rp[i])) {
2384 fg_removeuipc(rp[i]);
2385 }
2386
2387 proc_fdlock(p);
2388 procfdtbl_releasefd(p, fds[i], fp);
2389 proc_fdunlock(p);
2390 }
2391
2392 /*
2393 * Step 3:
2394 * Return the fds into `cm`.
2395 * Handle the fact ints and pointers do not have the same size.
2396 */
2397 int *fds_out = (int *)(cm + 1);
2398 memcpy(fds_out, fds, newfds * sizeof(int));
2399 if (sizeof(struct fileglob *) != sizeof(int)) {
2400 bzero(fds_out + newfds,
2401 newfds * (sizeof(struct fileglob *) - sizeof(int)));
2402 }
2403 OSAddAtomic(-newfds, &unp_rights);
2404
2405 out:
2406 if (error) {
2407 for (int i = 0; i < newfds; i++) {
2408 unp_discard(rp[i], p);
2409 }
2410 bzero(rp, newfds * sizeof(struct fileglob *));
2411 }
2412
2413 kfree_data(fds, newfds * sizeof(int));
2414 return error;
2415 }
2416
2417 void
unp_init(void)2418 unp_init(void)
2419 {
2420 _CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
2421 LIST_INIT(&unp_dhead);
2422 LIST_INIT(&unp_shead);
2423 }
2424
2425 #ifndef MIN
2426 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
2427 #endif
2428
2429 /*
2430 * Returns: 0 Success
2431 * EINVAL
2432 * EBADF
2433 */
2434 static int
unp_internalize(struct mbuf * control,proc_t p)2435 unp_internalize(struct mbuf *control, proc_t p)
2436 {
2437 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
2438 int *fds;
2439 struct fileglob **rp;
2440 struct fileproc *fp;
2441 int i, error;
2442 int oldfds;
2443 uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
2444
2445 /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
2446 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
2447 (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
2448 return EINVAL;
2449 }
2450 oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2451 bzero(fg_ins, sizeof(fg_ins));
2452
2453 proc_fdlock(p);
2454 fds = (int *)(cm + 1);
2455
2456 for (i = 0; i < oldfds; i++) {
2457 struct fileproc *tmpfp;
2458 if ((tmpfp = fp_get_noref_locked(p, fds[i])) == NULL) {
2459 proc_fdunlock(p);
2460 return EBADF;
2461 } else if (!fg_sendable(tmpfp->fp_glob)) {
2462 proc_fdunlock(p);
2463 return EINVAL;
2464 } else if (fp_isguarded(tmpfp, GUARD_SOCKET_IPC)) {
2465 error = fp_guard_exception(p,
2466 fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
2467 proc_fdunlock(p);
2468 return error;
2469 }
2470 }
2471 rp = (struct fileglob **)(cm + 1);
2472
2473 /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2474 * and doing them in-order would result in stomping over unprocessed fd's
2475 */
2476 for (i = (oldfds - 1); i >= 0; i--) {
2477 fp = fp_get_noref_locked(p, fds[i]);
2478 if (fg_insertuipc_mark(fp->fp_glob)) {
2479 fg_ins[i / 8] |= 0x80 >> (i % 8);
2480 }
2481 rp[i] = fp->fp_glob;
2482 }
2483 proc_fdunlock(p);
2484
2485 for (i = 0; i < oldfds; i++) {
2486 if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
2487 VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2488 fg_insertuipc(rp[i]);
2489 }
2490 (void) OSAddAtomic(1, &unp_rights);
2491 }
2492
2493 return 0;
2494 }
2495
2496 static void
unp_gc(thread_call_param_t arg0,thread_call_param_t arg1)2497 unp_gc(thread_call_param_t arg0, thread_call_param_t arg1)
2498 {
2499 #pragma unused(arg0, arg1)
2500 struct fileglob *fg;
2501 struct socket *so;
2502 static struct fileglob **extra_ref;
2503 struct fileglob **fpp;
2504 int nunref, i;
2505
2506 restart:
2507 lck_mtx_lock(&uipc_lock);
2508 unp_defer = 0;
2509 /*
2510 * before going through all this, set all FDs to
2511 * be NOT defered and NOT externally accessible
2512 */
2513 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2514 os_atomic_andnot(&fg->fg_flag, FMARK | FDEFER, relaxed);
2515 }
2516 do {
2517 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2518 lck_mtx_lock(&fg->fg_lock);
2519 /*
2520 * If the file is not open, skip it
2521 */
2522 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2523 lck_mtx_unlock(&fg->fg_lock);
2524 continue;
2525 }
2526 /*
2527 * If we already marked it as 'defer' in a
2528 * previous pass, then try process it this time
2529 * and un-mark it
2530 */
2531 if (fg->fg_flag & FDEFER) {
2532 os_atomic_andnot(&fg->fg_flag, FDEFER, relaxed);
2533 unp_defer--;
2534 } else {
2535 /*
2536 * if it's not defered, then check if it's
2537 * already marked.. if so skip it
2538 */
2539 if (fg->fg_flag & FMARK) {
2540 lck_mtx_unlock(&fg->fg_lock);
2541 continue;
2542 }
2543 /*
2544 * If all references are from messages
2545 * in transit, then skip it. it's not
2546 * externally accessible.
2547 */
2548 if (os_ref_get_count_raw(&fg->fg_count) ==
2549 fg->fg_msgcount) {
2550 lck_mtx_unlock(&fg->fg_lock);
2551 continue;
2552 }
2553 /*
2554 * If it got this far then it must be
2555 * externally accessible.
2556 */
2557 os_atomic_or(&fg->fg_flag, FMARK, relaxed);
2558 }
2559 /*
2560 * either it was defered, or it is externally
2561 * accessible and not already marked so.
2562 * Now check if it is possibly one of OUR sockets.
2563 */
2564 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2565 (so = (struct socket *)fg_get_data(fg)) == 0) {
2566 lck_mtx_unlock(&fg->fg_lock);
2567 continue;
2568 }
2569 if (so->so_proto->pr_domain != localdomain ||
2570 (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
2571 lck_mtx_unlock(&fg->fg_lock);
2572 continue;
2573 }
2574 /*
2575 * So, Ok, it's one of our sockets and it IS externally
2576 * accessible (or was defered). Now we look
2577 * to see if we hold any file descriptors in its
2578 * message buffers. Follow those links and mark them
2579 * as accessible too.
2580 *
2581 * In case a file is passed onto itself we need to
2582 * release the file lock.
2583 */
2584 lck_mtx_unlock(&fg->fg_lock);
2585 /*
2586 * It's safe to lock the socket after dropping fg_lock
2587 * because the socket isn't going away at this point.
2588 *
2589 * If we couldn't lock the socket or the socket buffer,
2590 * then it's because someone holding one of these
2591 * locks is stuck in unp_{internalize,externalize}().
2592 * Yield to that process and restart the garbage
2593 * collection.
2594 */
2595 if (!socket_try_lock(so)) {
2596 lck_mtx_unlock(&uipc_lock);
2597 goto restart;
2598 }
2599 so->so_usecount++;
2600 /*
2601 * Lock the receive socket buffer so that we can
2602 * iterate over its mbuf list.
2603 */
2604 if (sblock(&so->so_rcv, SBL_NOINTR | SBL_IGNDEFUNCT)) {
2605 socket_unlock(so, 1);
2606 lck_mtx_unlock(&uipc_lock);
2607 goto restart;
2608 }
2609 VERIFY(so->so_rcv.sb_flags & SB_LOCK);
2610 socket_unlock(so, 0);
2611 unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
2612 socket_lock(so, 0);
2613 sbunlock(&so->so_rcv, TRUE);
2614 /*
2615 * Unlock and release the reference acquired above.
2616 */
2617 socket_unlock(so, 1);
2618 }
2619 } while (unp_defer);
2620 /*
2621 * We grab an extra reference to each of the file table entries
2622 * that are not otherwise accessible and then free the rights
2623 * that are stored in messages on them.
2624 *
2625 * Here, we first take an extra reference to each inaccessible
2626 * descriptor. Then, we call sorflush ourself, since we know
2627 * it is a Unix domain socket anyhow. After we destroy all the
2628 * rights carried in messages, we do a last closef to get rid
2629 * of our extra reference. This is the last close, and the
2630 * unp_detach etc will shut down the socket.
2631 *
2632 * 91/09/19, [email protected]
2633 */
2634 size_t extra_ref_size = nfiles;
2635 extra_ref = kalloc_type(struct fileglob *, extra_ref_size, Z_WAITOK);
2636 if (extra_ref == NULL) {
2637 lck_mtx_unlock(&uipc_lock);
2638 return;
2639 }
2640 nunref = 0;
2641 fpp = extra_ref;
2642 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2643 lck_mtx_lock(&fg->fg_lock);
2644 /*
2645 * If it's not open, skip it
2646 */
2647 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2648 lck_mtx_unlock(&fg->fg_lock);
2649 continue;
2650 }
2651 /*
2652 * If all refs are from msgs, and it's not marked accessible
2653 * then it must be referenced from some unreachable cycle
2654 * of (shut-down) FDs, so include it in our
2655 * list of FDs to remove
2656 */
2657 if (fg->fg_flag & FMARK) {
2658 lck_mtx_unlock(&fg->fg_lock);
2659 continue;
2660 }
2661 if (os_ref_get_count_raw(&fg->fg_count) == fg->fg_msgcount) {
2662 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2663 *fpp++ = fg;
2664 nunref++;
2665 }
2666 lck_mtx_unlock(&fg->fg_lock);
2667 }
2668 lck_mtx_unlock(&uipc_lock);
2669
2670 /*
2671 * for each FD on our hit list, do the following two things
2672 */
2673 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2674 struct fileglob *tfg;
2675
2676 tfg = *fpp;
2677
2678 if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET) {
2679 so = (struct socket *)fg_get_data(tfg);
2680
2681 if (so) {
2682 socket_lock(so, 0);
2683 sorflush(so);
2684 socket_unlock(so, 0);
2685 }
2686 }
2687 }
2688 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2689 fg_drop(PROC_NULL, *fpp);
2690 }
2691
2692 kfree_type(struct fileglob *, extra_ref_size, extra_ref);
2693 }
2694
2695 void
unp_dispose(struct mbuf * m)2696 unp_dispose(struct mbuf *m)
2697 {
2698 if (m) {
2699 unp_scan(m, unp_discard, NULL);
2700 }
2701 }
2702
2703 /*
2704 * Returns: 0 Success
2705 */
2706 static int
unp_listen(struct unpcb * unp,proc_t p)2707 unp_listen(struct unpcb *unp, proc_t p)
2708 {
2709 kauth_cred_t safecred = kauth_cred_proc_ref(p);
2710 cru2x(safecred, &unp->unp_peercred);
2711 kauth_cred_unref(&safecred);
2712 unp->unp_flags |= UNP_HAVEPCCACHED;
2713 return 0;
2714 }
2715
2716 static void
unp_scan(struct mbuf * m0,void (* op)(struct fileglob *,void * arg),void * arg)2717 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
2718 {
2719 struct mbuf *m;
2720 struct fileglob **rp;
2721 struct cmsghdr *cm;
2722 int i;
2723 int qfds;
2724
2725 while (m0) {
2726 for (m = m0; m; m = m->m_next) {
2727 if (m->m_type == MT_CONTROL &&
2728 (size_t)m->m_len >= sizeof(*cm)) {
2729 cm = mtod(m, struct cmsghdr *);
2730 if (cm->cmsg_level != SOL_SOCKET ||
2731 cm->cmsg_type != SCM_RIGHTS) {
2732 continue;
2733 }
2734 qfds = (cm->cmsg_len - sizeof(*cm)) /
2735 sizeof(int);
2736 rp = (struct fileglob **)(cm + 1);
2737 for (i = 0; i < qfds; i++) {
2738 (*op)(*rp++, arg);
2739 }
2740 break; /* XXX, but saves time */
2741 }
2742 }
2743 m0 = m0->m_act;
2744 }
2745 }
2746
2747 static void
unp_mark(struct fileglob * fg,__unused void * arg)2748 unp_mark(struct fileglob *fg, __unused void *arg)
2749 {
2750 uint32_t oflags, nflags;
2751
2752 os_atomic_rmw_loop(&fg->fg_flag, oflags, nflags, relaxed, {
2753 if (oflags & FMARK) {
2754 os_atomic_rmw_loop_give_up(return );
2755 }
2756 nflags = oflags | FMARK | FDEFER;
2757 });
2758
2759 unp_defer++;
2760 }
2761
2762 static void
unp_discard(struct fileglob * fg,void * p)2763 unp_discard(struct fileglob *fg, void *p)
2764 {
2765 if (p == NULL) {
2766 p = current_proc(); /* XXX */
2767 }
2768 (void) OSAddAtomic(1, &unp_disposed);
2769 if (fg_removeuipc_mark(fg)) {
2770 VERIFY(fg->fg_lflags & FG_RMMSGQ);
2771 fg_removeuipc(fg);
2772 }
2773 (void) OSAddAtomic(-1, &unp_rights);
2774
2775 (void) fg_drop(p, fg);
2776 }
2777
2778 int
unp_lock(struct socket * so,int refcount,void * lr)2779 unp_lock(struct socket *so, int refcount, void * lr)
2780 {
2781 void * lr_saved;
2782 if (lr == 0) {
2783 lr_saved = (void *) __builtin_return_address(0);
2784 } else {
2785 lr_saved = lr;
2786 }
2787
2788 if (so->so_pcb) {
2789 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2790 } else {
2791 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x",
2792 so, lr_saved, so->so_usecount);
2793 }
2794
2795 if (so->so_usecount < 0) {
2796 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x",
2797 so, so->so_pcb, lr_saved, so->so_usecount);
2798 }
2799
2800 if (refcount) {
2801 VERIFY(so->so_usecount > 0);
2802 so->so_usecount++;
2803 }
2804 so->lock_lr[so->next_lock_lr] = lr_saved;
2805 so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2806 return 0;
2807 }
2808
2809 int
unp_unlock(struct socket * so,int refcount,void * lr)2810 unp_unlock(struct socket *so, int refcount, void * lr)
2811 {
2812 void * lr_saved;
2813 lck_mtx_t * mutex_held = NULL;
2814 struct unpcb *unp = sotounpcb(so);
2815
2816 if (lr == 0) {
2817 lr_saved = (void *) __builtin_return_address(0);
2818 } else {
2819 lr_saved = lr;
2820 }
2821
2822 if (refcount) {
2823 so->so_usecount--;
2824 }
2825
2826 if (so->so_usecount < 0) {
2827 panic("unp_unlock: so=%p usecount=%x", so, so->so_usecount);
2828 }
2829 if (so->so_pcb == NULL) {
2830 panic("unp_unlock: so=%p NO PCB usecount=%x", so, so->so_usecount);
2831 } else {
2832 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2833 }
2834 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2835 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2836 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2837
2838 if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2839 sofreelastref(so, 1);
2840
2841 if (unp->unp_addr != NULL) {
2842 free_sockaddr(unp->unp_addr);
2843 }
2844
2845 lck_mtx_unlock(mutex_held);
2846
2847 lck_mtx_destroy(&unp->unp_mtx, &unp_mtx_grp);
2848 zfree(unp_zone, unp);
2849 thread_call_enter(unp_gc_tcall);
2850 } else {
2851 lck_mtx_unlock(mutex_held);
2852 }
2853
2854 return 0;
2855 }
2856
2857 lck_mtx_t *
unp_getlock(struct socket * so,__unused int flags)2858 unp_getlock(struct socket *so, __unused int flags)
2859 {
2860 struct unpcb *unp = (struct unpcb *)so->so_pcb;
2861
2862
2863 if (so->so_pcb) {
2864 if (so->so_usecount < 0) {
2865 panic("unp_getlock: so=%p usecount=%x", so, so->so_usecount);
2866 }
2867 return &unp->unp_mtx;
2868 } else {
2869 panic("unp_getlock: so=%p NULL so_pcb", so);
2870 return so->so_proto->pr_domain->dom_mtx;
2871 }
2872 }
2873