1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
61 */
62 /*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68 #include <os/log.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/domain.h>
73 #include <sys/fcntl.h>
74 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */
75 #include <sys/file_internal.h>
76 #include <sys/guarded.h>
77 #include <sys/filedesc.h>
78 #include <sys/lock.h>
79 #include <sys/mbuf.h>
80 #include <sys/namei.h>
81 #include <sys/proc_internal.h>
82 #include <sys/kauth.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/stat.h>
87 #include <sys/sysctl.h>
88 #include <sys/un.h>
89 #include <sys/unpcb.h>
90 #include <sys/vnode_internal.h>
91 #include <sys/kdebug.h>
92 #include <sys/mcache.h>
93
94 #include <kern/zalloc.h>
95 #include <kern/locks.h>
96 #include <kern/task.h>
97
98 #include <net/sockaddr_utils.h>
99
100 #if __has_ptrcheck
101 #include <machine/trap.h>
102 #endif /* __has_ptrcheck */
103
104 #if CONFIG_MACF
105 #include <security/mac_framework.h>
106 #endif /* CONFIG_MACF */
107
108 #include <mach/vm_param.h>
109
110 #ifndef ROUNDUP64
111 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
112 #endif
113
114 #ifndef ADVANCE64
115 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
116 #endif
117
118 /*
119 * Maximum number of FDs that can be passed in an mbuf
120 */
121 #define UIPC_MAX_CMSG_FD 512
122
123 ZONE_DEFINE_TYPE(unp_zone, "unpzone", struct unpcb, ZC_NONE);
124 static unp_gen_t unp_gencnt;
125 static u_int unp_count;
126
127 static LCK_ATTR_DECLARE(unp_mtx_attr, 0, 0);
128 static LCK_GRP_DECLARE(unp_mtx_grp, "unp_list");
129 static LCK_RW_DECLARE_ATTR(unp_list_mtx, &unp_mtx_grp, &unp_mtx_attr);
130
131 static LCK_MTX_DECLARE_ATTR(unp_disconnect_lock, &unp_mtx_grp, &unp_mtx_attr);
132 static LCK_MTX_DECLARE_ATTR(unp_connect_lock, &unp_mtx_grp, &unp_mtx_attr);
133 static LCK_MTX_DECLARE_ATTR(uipc_lock, &unp_mtx_grp, &unp_mtx_attr);
134
135 static u_int disconnect_in_progress;
136
137 static struct unp_head unp_shead, unp_dhead;
138 static int unp_defer;
139 static thread_call_t unp_gc_tcall;
140 static LIST_HEAD(, fileglob) unp_msghead = LIST_HEAD_INITIALIZER(unp_msghead);
141
142 SYSCTL_DECL(_net_local);
143
144 static int unp_rights; /* file descriptors in flight */
145 static int unp_disposed; /* discarded file descriptors */
146
147 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
148
149 #define ULEF_CONNECTION 0x01
150 uint32_t unp_log_enable_flags = 0;
151
152 SYSCTL_UINT(_net_local, OID_AUTO, log, CTLFLAG_RD | CTLFLAG_LOCKED,
153 &unp_log_enable_flags, 0, "");
154
155
156 /*
157 * mDNSResponder tracing. When enabled, endpoints connected to
158 * /var/run/mDNSResponder will be traced; during each send on
159 * the traced socket, we log the PID and process name of the
160 * sending process. We also print out a bit of info related
161 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
162 * of mDNSResponder stays the same.
163 */
164 #define MDNSRESPONDER_PATH "/var/run/mDNSResponder"
165
166 static int unpst_tracemdns; /* enable tracing */
167
168 #define MDNS_IPC_MSG_HDR_VERSION_1 1
169
170 struct mdns_ipc_msg_hdr {
171 uint32_t version;
172 uint32_t datalen;
173 uint32_t ipc_flags;
174 uint32_t op;
175 union {
176 void *context;
177 uint32_t u32[2];
178 } __attribute__((packed));
179 uint32_t reg_index;
180 } __attribute__((packed));
181
182 /*
183 * Unix communications domain.
184 *
185 * TODO:
186 * SEQPACKET, RDM
187 * rethink name space problems
188 * need a proper out-of-band
189 * lock pushdown
190 */
191 static struct sockaddr sun_noname = {
192 .sa_len = sizeof(struct sockaddr),
193 .sa_family = AF_LOCAL,
194 .sa_data = {
195 0, 0, 0, 0, 0, 0, 0,
196 0, 0, 0, 0, 0, 0, 0
197 }
198 };
199
200 static ino_t unp_ino; /* prototype for fake inode numbers */
201
202 static int unp_attach(struct socket *);
203 static void unp_detach(struct unpcb *);
204 static int unp_bind(struct unpcb *, struct sockaddr *, proc_t);
205 static int unp_connect(struct socket *, struct sockaddr *, proc_t);
206 static void unp_disconnect(struct unpcb *);
207 static void unp_shutdown(struct unpcb *);
208 static void unp_drop(struct unpcb *, int);
209 static void unp_gc(thread_call_param_t arg0, thread_call_param_t arg1);
210 static void unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
211 static void unp_mark(struct fileglob *, __unused void *);
212 static void unp_discard(struct fileglob *, void *);
213 static int unp_internalize(struct mbuf *, proc_t);
214 static int unp_listen(struct unpcb *, proc_t);
215 static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
216 static void unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
217
218 __startup_func
219 static void
unp_gc_setup(void)220 unp_gc_setup(void)
221 {
222 unp_gc_tcall = thread_call_allocate_with_options(unp_gc,
223 NULL, THREAD_CALL_PRIORITY_KERNEL,
224 THREAD_CALL_OPTIONS_ONCE);
225 }
226 STARTUP(THREAD_CALL, STARTUP_RANK_MIDDLE, unp_gc_setup);
227
228 static void
unp_get_locks_in_order(struct socket * so,struct socket * conn_so)229 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
230 {
231 if (so < conn_so) {
232 socket_lock(conn_so, 1);
233 } else {
234 struct unpcb *unp = sotounpcb(so);
235 unp->unp_flags |= UNP_DONTDISCONNECT;
236 unp->rw_thrcount++;
237 socket_unlock(so, 0);
238
239 /* Get the locks in the correct order */
240 socket_lock(conn_so, 1);
241 socket_lock(so, 0);
242 unp->rw_thrcount--;
243 if (unp->rw_thrcount == 0) {
244 unp->unp_flags &= ~UNP_DONTDISCONNECT;
245 wakeup(unp);
246 }
247 }
248 }
249
250 static int
uipc_abort(struct socket * so)251 uipc_abort(struct socket *so)
252 {
253 struct unpcb *unp = sotounpcb(so);
254
255 if (unp == 0) {
256 return EINVAL;
257 }
258 unp_drop(unp, ECONNABORTED);
259 unp_detach(unp);
260 sofree(so);
261 return 0;
262 }
263
264 static int
uipc_accept(struct socket * so,struct sockaddr ** nam)265 uipc_accept(struct socket *so, struct sockaddr **nam)
266 {
267 struct unpcb *unp = sotounpcb(so);
268
269 if (unp == 0) {
270 return EINVAL;
271 }
272
273 /*
274 * Pass back name of connected socket,
275 * if it was bound and we are still connected
276 * (our peer may have closed already!).
277 */
278 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
279 *nam = dup_sockaddr((struct sockaddr *)
280 unp->unp_conn->unp_addr, 1);
281 } else {
282 if (unp_log_enable_flags & ULEF_CONNECTION) {
283 os_log(OS_LOG_DEFAULT, "%s: peer disconnected unp_gencnt %llu",
284 __func__, unp->unp_gencnt);
285 }
286 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
287 }
288 return 0;
289 }
290
291 /*
292 * Returns: 0 Success
293 * EISCONN
294 * unp_attach:
295 */
296 static int
uipc_attach(struct socket * so,__unused int proto,__unused proc_t p)297 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
298 {
299 struct unpcb *unp = sotounpcb(so);
300
301 if (unp != 0) {
302 return EISCONN;
303 }
304 return unp_attach(so);
305 }
306
307 static int
uipc_bind(struct socket * so,struct sockaddr * nam,proc_t p)308 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
309 {
310 struct unpcb *unp = sotounpcb(so);
311
312 if (unp == 0) {
313 return EINVAL;
314 }
315
316 return unp_bind(unp, nam, p);
317 }
318
319 /*
320 * Returns: 0 Success
321 * EINVAL
322 * unp_connect:??? [See elsewhere in this file]
323 */
324 static int
uipc_connect(struct socket * so,struct sockaddr * nam,proc_t p)325 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
326 {
327 struct unpcb *unp = sotounpcb(so);
328
329 if (unp == 0) {
330 return EINVAL;
331 }
332 return unp_connect(so, nam, p);
333 }
334
335 /*
336 * Returns: 0 Success
337 * EINVAL
338 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
339 * unp_connect2:EINVAL Invalid argument
340 */
341 static int
uipc_connect2(struct socket * so1,struct socket * so2)342 uipc_connect2(struct socket *so1, struct socket *so2)
343 {
344 struct unpcb *unp = sotounpcb(so1);
345
346 if (unp == 0) {
347 return EINVAL;
348 }
349
350 return unp_connect2(so1, so2);
351 }
352
353 /* control is EOPNOTSUPP */
354
355 static int
uipc_detach(struct socket * so)356 uipc_detach(struct socket *so)
357 {
358 struct unpcb *unp = sotounpcb(so);
359
360 if (unp == 0) {
361 return EINVAL;
362 }
363
364 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
365 unp_detach(unp);
366 return 0;
367 }
368
369 static int
uipc_disconnect(struct socket * so)370 uipc_disconnect(struct socket *so)
371 {
372 struct unpcb *unp = sotounpcb(so);
373
374 if (unp == 0) {
375 return EINVAL;
376 }
377 unp_disconnect(unp);
378 return 0;
379 }
380
381 /*
382 * Returns: 0 Success
383 * EINVAL
384 */
385 static int
uipc_listen(struct socket * so,__unused proc_t p)386 uipc_listen(struct socket *so, __unused proc_t p)
387 {
388 struct unpcb *unp = sotounpcb(so);
389
390 if (unp == 0 || unp->unp_vnode == 0) {
391 return EINVAL;
392 }
393 return unp_listen(unp, p);
394 }
395
396 static int
uipc_peeraddr(struct socket * so,struct sockaddr ** nam)397 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
398 {
399 struct unpcb *unp = sotounpcb(so);
400 struct socket *so2;
401
402 if (unp == NULL) {
403 return EINVAL;
404 }
405 so2 = unp->unp_conn != NULL ? unp->unp_conn->unp_socket : NULL;
406 if (so2 != NULL) {
407 unp_get_locks_in_order(so, so2);
408 }
409
410 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
411 *nam = dup_sockaddr((struct sockaddr *)
412 unp->unp_conn->unp_addr, 1);
413 } else {
414 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
415 }
416 if (so2 != NULL) {
417 socket_unlock(so2, 1);
418 }
419 return 0;
420 }
421
422 static int
uipc_rcvd(struct socket * so,__unused int flags)423 uipc_rcvd(struct socket *so, __unused int flags)
424 {
425 struct unpcb *unp = sotounpcb(so);
426 struct socket *so2;
427
428 if (unp == 0) {
429 return EINVAL;
430 }
431 switch (so->so_type) {
432 case SOCK_DGRAM:
433 panic("uipc_rcvd DGRAM?");
434 /*NOTREACHED*/
435
436 case SOCK_STREAM:
437 #define rcv (&so->so_rcv)
438 #define snd (&so2->so_snd)
439 if (unp->unp_conn == 0) {
440 break;
441 }
442
443 so2 = unp->unp_conn->unp_socket;
444 unp_get_locks_in_order(so, so2);
445 /*
446 * Adjust backpressure on sender
447 * and wakeup any waiting to write.
448 */
449 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
450 unp->unp_mbcnt = rcv->sb_mbcnt;
451 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
452 unp->unp_cc = rcv->sb_cc;
453 if (sb_notify(&so2->so_snd)) {
454 sowakeup(so2, &so2->so_snd, so);
455 }
456
457 socket_unlock(so2, 1);
458
459 #undef snd
460 #undef rcv
461 break;
462
463 default:
464 panic("uipc_rcvd unknown socktype");
465 }
466 return 0;
467 }
468
469 /* pru_rcvoob is EOPNOTSUPP */
470
471 /*
472 * Returns: 0 Success
473 * EINVAL
474 * EOPNOTSUPP
475 * EPIPE
476 * ENOTCONN
477 * EISCONN
478 * unp_internalize:EINVAL
479 * unp_internalize:EBADF
480 * unp_connect:EAFNOSUPPORT Address family not supported
481 * unp_connect:EINVAL Invalid argument
482 * unp_connect:ENOTSOCK Not a socket
483 * unp_connect:ECONNREFUSED Connection refused
484 * unp_connect:EISCONN Socket is connected
485 * unp_connect:EPROTOTYPE Protocol wrong type for socket
486 * unp_connect:???
487 * sbappendaddr:ENOBUFS [5th argument, contents modified]
488 * sbappendaddr:??? [whatever a filter author chooses]
489 */
490 static int
uipc_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,proc_t p)491 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
492 struct mbuf *control, proc_t p)
493 {
494 int error = 0;
495 struct unpcb *unp = sotounpcb(so);
496 struct socket *so2;
497 int32_t len = m_pktlen(m);
498
499 if (unp == 0) {
500 error = EINVAL;
501 goto release;
502 }
503 if (flags & PRUS_OOB) {
504 error = EOPNOTSUPP;
505 goto release;
506 }
507
508 if (control) {
509 /* release lock to avoid deadlock (4436174) */
510 socket_unlock(so, 0);
511 error = unp_internalize(control, p);
512 socket_lock(so, 0);
513 if (error) {
514 goto release;
515 }
516 }
517
518 switch (so->so_type) {
519 case SOCK_DGRAM:
520 {
521 struct sockaddr *from;
522
523 if (nam) {
524 if (unp->unp_conn) {
525 error = EISCONN;
526 break;
527 }
528 error = unp_connect(so, nam, p);
529 if (error) {
530 so->so_state &= ~SS_ISCONNECTING;
531 break;
532 }
533 } else {
534 if (unp->unp_conn == 0) {
535 error = ENOTCONN;
536 break;
537 }
538 }
539
540 so2 = unp->unp_conn->unp_socket;
541 if (so != so2) {
542 unp_get_locks_in_order(so, so2);
543 }
544
545 if (unp->unp_addr) {
546 from = (struct sockaddr *)unp->unp_addr;
547 } else {
548 from = &sun_noname;
549 }
550 /*
551 * sbappendaddr() will fail when the receiver runs out of
552 * space; in contrast to SOCK_STREAM, we will lose messages
553 * for the SOCK_DGRAM case when the receiver's queue overflows.
554 * SB_UNIX on the socket buffer implies that the callee will
555 * not free the control message, if any, because we would need
556 * to call unp_dispose() on it.
557 */
558 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
559 control = NULL;
560 if (sb_notify(&so2->so_rcv)) {
561 sowakeup(so2, &so2->so_rcv, so);
562 }
563 so2->so_tc_stats[0].rxpackets += 1;
564 so2->so_tc_stats[0].rxbytes += len;
565 } else if (control != NULL && error == 0) {
566 /* A socket filter took control; don't touch it */
567 control = NULL;
568 }
569
570 if (so != so2) {
571 socket_unlock(so2, 1);
572 }
573
574 m = NULL;
575 if (nam) {
576 unp_disconnect(unp);
577 }
578 break;
579 }
580
581 case SOCK_STREAM: {
582 int didreceive = 0;
583 #define rcv (&so2->so_rcv)
584 #define snd (&so->so_snd)
585 /* Connect if not connected yet. */
586 /*
587 * Note: A better implementation would complain
588 * if not equal to the peer's address.
589 */
590 if ((so->so_state & SS_ISCONNECTED) == 0) {
591 if (nam) {
592 error = unp_connect(so, nam, p);
593 if (error) {
594 so->so_state &= ~SS_ISCONNECTING;
595 break; /* XXX */
596 }
597 } else {
598 error = ENOTCONN;
599 break;
600 }
601 }
602
603 if (so->so_state & SS_CANTSENDMORE) {
604 error = EPIPE;
605 break;
606 }
607 if (unp->unp_conn == 0) {
608 panic("uipc_send connected but no connection? "
609 "socket state: %x socket flags: %x socket flags1: %x.",
610 so->so_state, so->so_flags, so->so_flags1);
611 }
612
613 so2 = unp->unp_conn->unp_socket;
614 unp_get_locks_in_order(so, so2);
615
616 /* Check socket state again as we might have unlocked the socket
617 * while trying to get the locks in order
618 */
619
620 if ((so->so_state & SS_CANTSENDMORE)) {
621 error = EPIPE;
622 socket_unlock(so2, 1);
623 break;
624 }
625
626 if (unp->unp_flags & UNP_TRACE_MDNS) {
627 struct mdns_ipc_msg_hdr hdr;
628
629 if (mbuf_copydata(m, 0, sizeof(hdr), &hdr) == 0 &&
630 hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
631 os_log(OS_LOG_DEFAULT,
632 "%s[mDNSResponder] pid=%d (%s): op=0x%x",
633 __func__, proc_getpid(p), p->p_comm, ntohl(hdr.op));
634 }
635 }
636
637 /*
638 * Send to paired receive port, and then reduce send buffer
639 * hiwater marks to maintain backpressure. Wake up readers.
640 * SB_UNIX flag will allow new record to be appended to the
641 * receiver's queue even when it is already full. It is
642 * possible, however, that append might fail. In that case,
643 * we will need to call unp_dispose() on the control message;
644 * the callee will not free it since SB_UNIX is set.
645 */
646 didreceive = control ?
647 sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
648
649 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
650 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
651 if ((int32_t)snd->sb_hiwat >=
652 (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
653 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
654 } else {
655 snd->sb_hiwat = 0;
656 }
657 unp->unp_conn->unp_cc = rcv->sb_cc;
658 if (didreceive) {
659 control = NULL;
660 if (sb_notify(&so2->so_rcv)) {
661 sowakeup(so2, &so2->so_rcv, so);
662 }
663 so2->so_tc_stats[0].rxpackets += 1;
664 so2->so_tc_stats[0].rxbytes += len;
665 } else if (control != NULL && error == 0) {
666 /* A socket filter took control; don't touch it */
667 control = NULL;
668 }
669
670 socket_unlock(so2, 1);
671 m = NULL;
672 #undef snd
673 #undef rcv
674 }
675 break;
676
677 default:
678 panic("uipc_send unknown socktype");
679 }
680
681 so->so_tc_stats[0].txpackets += 1;
682 so->so_tc_stats[0].txbytes += len;
683
684 /*
685 * SEND_EOF is equivalent to a SEND followed by
686 * a SHUTDOWN.
687 */
688 if (flags & PRUS_EOF) {
689 socantsendmore(so);
690 unp_shutdown(unp);
691 }
692
693 if (control && error != 0) {
694 socket_unlock(so, 0);
695 unp_dispose(control);
696 socket_lock(so, 0);
697 }
698
699 release:
700 if (control) {
701 m_freem(control);
702 }
703 if (m) {
704 m_freem(m);
705 }
706 return error;
707 }
708
709 static int
uipc_sense(struct socket * so,void * ub,int isstat64)710 uipc_sense(struct socket *so, void *ub, int isstat64)
711 {
712 struct unpcb *unp = sotounpcb(so);
713 struct socket *so2;
714 blksize_t blksize;
715
716 if (unp == 0) {
717 return EINVAL;
718 }
719
720 blksize = so->so_snd.sb_hiwat;
721 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
722 so2 = unp->unp_conn->unp_socket;
723 blksize += so2->so_rcv.sb_cc;
724 }
725 if (unp->unp_ino == 0) {
726 unp->unp_ino = unp_ino++;
727 }
728
729 if (isstat64 != 0) {
730 struct stat64 *sb64;
731
732 sb64 = (struct stat64 *)ub;
733 sb64->st_blksize = blksize;
734 sb64->st_dev = NODEV;
735 sb64->st_ino = (ino64_t)unp->unp_ino;
736 } else {
737 struct stat *sb;
738
739 sb = (struct stat *)ub;
740 sb->st_blksize = blksize;
741 sb->st_dev = NODEV;
742 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
743 }
744
745 return 0;
746 }
747
748 /*
749 * Returns: 0 Success
750 * EINVAL
751 *
752 * Notes: This is not strictly correct, as unp_shutdown() also calls
753 * socantrcvmore(). These should maybe both be conditionalized
754 * on the 'how' argument in soshutdown() as called from the
755 * shutdown() system call.
756 */
757 static int
uipc_shutdown(struct socket * so)758 uipc_shutdown(struct socket *so)
759 {
760 struct unpcb *unp = sotounpcb(so);
761
762 if (unp == 0) {
763 return EINVAL;
764 }
765 socantsendmore(so);
766 unp_shutdown(unp);
767 return 0;
768 }
769
770 /*
771 * Returns: 0 Success
772 * EINVAL Invalid argument
773 */
774 static int
uipc_sockaddr(struct socket * so,struct sockaddr ** nam)775 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
776 {
777 struct unpcb *unp = sotounpcb(so);
778
779 if (unp == NULL) {
780 return EINVAL;
781 }
782 if (unp->unp_addr != NULL) {
783 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
784 } else {
785 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
786 }
787 return 0;
788 }
789
790 struct pr_usrreqs uipc_usrreqs = {
791 .pru_abort = uipc_abort,
792 .pru_accept = uipc_accept,
793 .pru_attach = uipc_attach,
794 .pru_bind = uipc_bind,
795 .pru_connect = uipc_connect,
796 .pru_connect2 = uipc_connect2,
797 .pru_detach = uipc_detach,
798 .pru_disconnect = uipc_disconnect,
799 .pru_listen = uipc_listen,
800 .pru_peeraddr = uipc_peeraddr,
801 .pru_rcvd = uipc_rcvd,
802 .pru_send = uipc_send,
803 .pru_sense = uipc_sense,
804 .pru_shutdown = uipc_shutdown,
805 .pru_sockaddr = uipc_sockaddr,
806 .pru_sosend = sosend,
807 .pru_soreceive = soreceive,
808 };
809
810 int
uipc_ctloutput(struct socket * so,struct sockopt * sopt)811 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
812 {
813 struct unpcb *unp = sotounpcb(so);
814 int error = 0;
815 pid_t peerpid;
816 proc_t p;
817 task_t t __single;
818 struct socket *peerso;
819
820 switch (sopt->sopt_dir) {
821 case SOPT_GET:
822 switch (sopt->sopt_name) {
823 case LOCAL_PEERCRED:
824 if (unp->unp_flags & UNP_HAVEPC) {
825 error = sooptcopyout(sopt, &unp->unp_peercred,
826 sizeof(unp->unp_peercred));
827 } else {
828 if (so->so_type == SOCK_STREAM) {
829 error = ENOTCONN;
830 } else {
831 error = EINVAL;
832 }
833 }
834 break;
835 case LOCAL_PEERPID:
836 case LOCAL_PEEREPID:
837 if (unp->unp_conn == NULL) {
838 error = ENOTCONN;
839 break;
840 }
841 peerso = unp->unp_conn->unp_socket;
842 if (peerso == NULL) {
843 panic("peer is connected but has no socket?");
844 }
845 unp_get_locks_in_order(so, peerso);
846 if (sopt->sopt_name == LOCAL_PEEREPID &&
847 peerso->so_flags & SOF_DELEGATED) {
848 peerpid = peerso->e_pid;
849 } else {
850 peerpid = peerso->last_pid;
851 }
852 socket_unlock(peerso, 1);
853 error = sooptcopyout(sopt, &peerpid, sizeof(peerpid));
854 break;
855 case LOCAL_PEERUUID:
856 case LOCAL_PEEREUUID:
857 if (unp->unp_conn == NULL) {
858 error = ENOTCONN;
859 break;
860 }
861 peerso = unp->unp_conn->unp_socket;
862 if (peerso == NULL) {
863 panic("peer is connected but has no socket?");
864 }
865 unp_get_locks_in_order(so, peerso);
866 if (sopt->sopt_name == LOCAL_PEEREUUID &&
867 peerso->so_flags & SOF_DELEGATED) {
868 error = sooptcopyout(sopt, &peerso->e_uuid,
869 sizeof(peerso->e_uuid));
870 } else {
871 error = sooptcopyout(sopt, &peerso->last_uuid,
872 sizeof(peerso->last_uuid));
873 }
874 socket_unlock(peerso, 1);
875 break;
876 case LOCAL_PEERTOKEN:
877 if (unp->unp_conn == NULL) {
878 error = ENOTCONN;
879 break;
880 }
881 peerso = unp->unp_conn->unp_socket;
882 if (peerso == NULL) {
883 panic("peer is connected but has no socket?");
884 }
885 unp_get_locks_in_order(so, peerso);
886 peerpid = peerso->last_pid;
887 p = proc_find(peerpid);
888 if (p != PROC_NULL) {
889 t = proc_task(p);
890 if (t != TASK_NULL) {
891 audit_token_t peertoken;
892 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
893 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&peertoken, &count) == KERN_SUCCESS) {
894 error = sooptcopyout(sopt, &peertoken, sizeof(peertoken));
895 } else {
896 error = EINVAL;
897 }
898 } else {
899 error = EINVAL;
900 }
901 proc_rele(p);
902 } else {
903 error = EINVAL;
904 }
905 socket_unlock(peerso, 1);
906 break;
907 default:
908 error = EOPNOTSUPP;
909 break;
910 }
911 break;
912 case SOPT_SET:
913 default:
914 error = EOPNOTSUPP;
915 break;
916 }
917
918 return error;
919 }
920
921 /*
922 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
923 * for stream sockets, although the total for sender and receiver is
924 * actually only PIPSIZ.
925 * Datagram sockets really use the sendspace as the maximum datagram size,
926 * and don't really want to reserve the sendspace. Their recvspace should
927 * be large enough for at least one max-size datagram plus address.
928 */
929 #ifndef PIPSIZ
930 #define PIPSIZ 8192
931 #endif
932 static u_int32_t unpst_sendspace = PIPSIZ;
933 static u_int32_t unpst_recvspace = PIPSIZ;
934 static u_int32_t unpdg_sendspace = 2 * 1024; /* really max datagram size */
935 static u_int32_t unpdg_recvspace = 4 * 1024;
936
937 SYSCTL_DECL(_net_local_stream);
938 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
939 &unpst_sendspace, 0, "");
940 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
941 &unpst_recvspace, 0, "");
942 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
943 &unpst_tracemdns, 0, "");
944 SYSCTL_DECL(_net_local_dgram);
945 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
946 &unpdg_sendspace, 0, "");
947 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
948 &unpdg_recvspace, 0, "");
949
950 /*
951 * Returns: 0 Success
952 * ENOBUFS
953 * soreserve:ENOBUFS
954 */
955 static int
unp_attach(struct socket * so)956 unp_attach(struct socket *so)
957 {
958 struct unpcb *unp;
959 int error = 0;
960
961 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
962 switch (so->so_type) {
963 case SOCK_STREAM:
964 error = soreserve(so, unpst_sendspace, unpst_recvspace);
965 break;
966
967 case SOCK_DGRAM:
968 /*
969 * By default soreserve() will set the low water
970 * mark to MCLBYTES which is too high given our
971 * default sendspace. Override it here to something
972 * sensible.
973 */
974 so->so_snd.sb_lowat = 1;
975 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
976 break;
977
978 default:
979 panic("unp_attach");
980 }
981 if (error) {
982 return error;
983 }
984 }
985 unp = zalloc_flags(unp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
986
987 lck_mtx_init(&unp->unp_mtx, &unp_mtx_grp, &unp_mtx_attr);
988
989 lck_rw_lock_exclusive(&unp_list_mtx);
990 LIST_INIT(&unp->unp_refs);
991 unp->unp_socket = so;
992 unp->unp_gencnt = ++unp_gencnt;
993 unp_count++;
994 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
995 &unp_dhead : &unp_shead, unp, unp_link);
996 lck_rw_done(&unp_list_mtx);
997 so->so_pcb = (caddr_t)unp;
998 /*
999 * Mark AF_UNIX socket buffers accordingly so that:
1000 *
1001 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
1002 * the lack of space; this essentially loosens the sbspace() check,
1003 * since there is disconnect between sosend() and uipc_send() with
1004 * respect to flow control that might result in our dropping the
1005 * data in uipc_send(). By setting this, we allow for slightly
1006 * more records to be appended to the receiving socket to avoid
1007 * losing data (which we can't afford in the SOCK_STREAM case).
1008 * Flow control still takes place since we adjust the sender's
1009 * hiwat during each send. This doesn't affect the SOCK_DGRAM
1010 * case and append would still fail when the queue overflows.
1011 *
1012 * b. In the presence of control messages containing internalized
1013 * file descriptors, the append routines will not free them since
1014 * we'd need to undo the work first via unp_dispose().
1015 */
1016 so->so_rcv.sb_flags |= SB_UNIX;
1017 so->so_snd.sb_flags |= SB_UNIX;
1018 return 0;
1019 }
1020
1021 static void
unp_detach(struct unpcb * unp)1022 unp_detach(struct unpcb *unp)
1023 {
1024 int so_locked = 1;
1025
1026 lck_rw_lock_exclusive(&unp_list_mtx);
1027 LIST_REMOVE(unp, unp_link);
1028 --unp_count;
1029 ++unp_gencnt;
1030 lck_rw_done(&unp_list_mtx);
1031 if (unp->unp_vnode) {
1032 struct vnode *tvp = NULL;
1033 socket_unlock(unp->unp_socket, 0);
1034
1035 /* Holding unp_connect_lock will avoid a race between
1036 * a thread closing the listening socket and a thread
1037 * connecting to it.
1038 */
1039 lck_mtx_lock(&unp_connect_lock);
1040 socket_lock(unp->unp_socket, 0);
1041 if (unp->unp_vnode) {
1042 tvp = unp->unp_vnode;
1043 unp->unp_vnode->v_socket = NULL;
1044 unp->unp_vnode = NULL;
1045 }
1046 lck_mtx_unlock(&unp_connect_lock);
1047 if (tvp != NULL) {
1048 vnode_rele(tvp); /* drop the usecount */
1049 }
1050 }
1051 if (unp->unp_conn) {
1052 unp_disconnect(unp);
1053 }
1054 while (unp->unp_refs.lh_first) {
1055 struct unpcb *unp2 = NULL;
1056
1057 /* This datagram socket is connected to one or more
1058 * sockets. In order to avoid a race condition between removing
1059 * this reference and closing the connected socket, we need
1060 * to check disconnect_in_progress
1061 */
1062 if (so_locked == 1) {
1063 socket_unlock(unp->unp_socket, 0);
1064 so_locked = 0;
1065 }
1066 lck_mtx_lock(&unp_disconnect_lock);
1067 while (disconnect_in_progress != 0) {
1068 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1069 PSOCK, "disconnect", NULL);
1070 }
1071 disconnect_in_progress = 1;
1072 lck_mtx_unlock(&unp_disconnect_lock);
1073
1074 /* Now we are sure that any unpcb socket disconnect is not happening */
1075 if (unp->unp_refs.lh_first != NULL) {
1076 unp2 = unp->unp_refs.lh_first;
1077 socket_lock(unp2->unp_socket, 1);
1078 }
1079
1080 lck_mtx_lock(&unp_disconnect_lock);
1081 disconnect_in_progress = 0;
1082 wakeup(&disconnect_in_progress);
1083 lck_mtx_unlock(&unp_disconnect_lock);
1084
1085 if (unp2 != NULL) {
1086 /* We already locked this socket and have a reference on it */
1087 unp_drop(unp2, ECONNRESET);
1088 socket_unlock(unp2->unp_socket, 1);
1089 }
1090 }
1091
1092 if (so_locked == 0) {
1093 socket_lock(unp->unp_socket, 0);
1094 so_locked = 1;
1095 }
1096 soisdisconnected(unp->unp_socket);
1097 /* makes sure we're getting dealloced */
1098 unp->unp_socket->so_flags |= SOF_PCBCLEARING;
1099 }
1100
1101 /*
1102 * Returns: 0 Success
1103 * EAFNOSUPPORT
1104 * EINVAL
1105 * EADDRINUSE
1106 * namei:??? [anything namei can return]
1107 * vnode_authorize:??? [anything vnode_authorize can return]
1108 *
1109 * Notes: p at this point is the current process, as this function is
1110 * only called by sobind().
1111 */
1112 static int
unp_bind(struct unpcb * unp,struct sockaddr * nam,proc_t p)1113 unp_bind(
1114 struct unpcb *unp,
1115 struct sockaddr *nam,
1116 proc_t p)
1117 {
1118 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1119 struct vnode *vp __single, *dvp;
1120 struct vnode_attr va;
1121 vfs_context_t ctx = vfs_context_current();
1122 int error, namelen;
1123 struct nameidata nd;
1124 struct socket *so = unp->unp_socket;
1125 char buf[SOCK_MAXADDRLEN];
1126
1127 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1128 return EAFNOSUPPORT;
1129 }
1130
1131 /*
1132 * Check if the socket is already bound to an address
1133 */
1134 if (unp->unp_vnode != NULL) {
1135 return EINVAL;
1136 }
1137 /*
1138 * Check if the socket may have been shut down
1139 */
1140 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1141 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1142 return EINVAL;
1143 }
1144
1145 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
1146 if (namelen <= 0) {
1147 return EINVAL;
1148 }
1149 /*
1150 * Note: sun_path is not a zero terminated "C" string
1151 */
1152 if (namelen >= SOCK_MAXADDRLEN) {
1153 return EINVAL;
1154 }
1155 bcopy(soun->sun_path, buf, namelen);
1156 buf[namelen] = 0;
1157
1158 socket_unlock(so, 0);
1159
1160 NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1161 CAST_USER_ADDR_T(buf), ctx);
1162 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1163 error = namei(&nd);
1164 if (error) {
1165 socket_lock(so, 0);
1166 return error;
1167 }
1168 dvp = nd.ni_dvp;
1169 vp = nd.ni_vp;
1170
1171 if (vp != NULL) {
1172 /*
1173 * need to do this before the vnode_put of dvp
1174 * since we may have to release an fs_nodelock
1175 */
1176 nameidone(&nd);
1177
1178 vnode_put(dvp);
1179 vnode_put(vp);
1180
1181 socket_lock(so, 0);
1182 return EADDRINUSE;
1183 }
1184
1185 VATTR_INIT(&va);
1186 VATTR_SET(&va, va_type, VSOCK);
1187 VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd.fd_cmask));
1188
1189 #if CONFIG_MACF
1190 error = mac_vnode_check_create(ctx,
1191 nd.ni_dvp, &nd.ni_cnd, &va);
1192
1193 if (error == 0)
1194 #endif /* CONFIG_MACF */
1195 #if CONFIG_MACF_SOCKET_SUBSET
1196 error = mac_vnode_check_uipc_bind(ctx,
1197 nd.ni_dvp, &nd.ni_cnd, &va);
1198
1199 if (error == 0)
1200 #endif /* MAC_SOCKET_SUBSET */
1201 /* authorize before creating */
1202 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1203
1204 if (!error) {
1205 /* create the socket */
1206 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1207 }
1208
1209 nameidone(&nd);
1210 vnode_put(dvp);
1211
1212 if (error) {
1213 socket_lock(so, 0);
1214 return error;
1215 }
1216
1217 socket_lock(so, 0);
1218
1219 if (unp->unp_vnode != NULL) {
1220 vnode_put(vp); /* drop the iocount */
1221 return EINVAL;
1222 }
1223
1224 error = vnode_ref(vp); /* gain a longterm reference */
1225 if (error) {
1226 vnode_put(vp); /* drop the iocount */
1227 return error;
1228 }
1229
1230 vp->v_socket = unp->unp_socket;
1231 unp->unp_vnode = vp;
1232 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1233 vnode_put(vp); /* drop the iocount */
1234
1235 return 0;
1236 }
1237
1238
1239 /*
1240 * Returns: 0 Success
1241 * EAFNOSUPPORT Address family not supported
1242 * EINVAL Invalid argument
1243 * ENOTSOCK Not a socket
1244 * ECONNREFUSED Connection refused
1245 * EPROTOTYPE Protocol wrong type for socket
1246 * EISCONN Socket is connected
1247 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
1248 * unp_connect2:EINVAL Invalid argument
1249 * namei:??? [anything namei can return]
1250 * vnode_authorize:???? [anything vnode_authorize can return]
1251 *
1252 * Notes: p at this point is the current process, as this function is
1253 * only called by sosend(), sendfile(), and soconnectlock().
1254 */
1255 static int
unp_connect(struct socket * so,struct sockaddr * nam,__unused proc_t p)1256 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1257 {
1258 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1259 struct vnode *vp;
1260 struct socket *so2, *so3, *list_so = NULL;
1261 struct unpcb *unp, *unp2, *unp3;
1262 vfs_context_t ctx = vfs_context_current();
1263 int error, len;
1264 struct nameidata nd;
1265 char buf[SOCK_MAXADDRLEN];
1266
1267 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1268 return EAFNOSUPPORT;
1269 }
1270
1271 unp = sotounpcb(so);
1272 so2 = so3 = NULL;
1273
1274 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1275 if (len <= 0) {
1276 return EINVAL;
1277 }
1278 /*
1279 * Note: sun_path is not a zero terminated "C" string
1280 */
1281 if (len >= SOCK_MAXADDRLEN) {
1282 return EINVAL;
1283 }
1284
1285 soisconnecting(so);
1286
1287 bcopy(soun->sun_path, buf, len);
1288 buf[len] = 0;
1289
1290 socket_unlock(so, 0);
1291
1292 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1293 CAST_USER_ADDR_T(buf), ctx);
1294 error = namei(&nd);
1295 if (error) {
1296 socket_lock(so, 0);
1297 return error;
1298 }
1299 nameidone(&nd);
1300 vp = nd.ni_vp;
1301 if (vp->v_type != VSOCK) {
1302 error = ENOTSOCK;
1303 socket_lock(so, 0);
1304 goto out;
1305 }
1306
1307 #if CONFIG_MACF_SOCKET_SUBSET
1308 error = mac_vnode_check_uipc_connect(ctx, vp, so);
1309 if (error) {
1310 socket_lock(so, 0);
1311 goto out;
1312 }
1313 #endif /* MAC_SOCKET_SUBSET */
1314
1315 error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1316 if (error) {
1317 socket_lock(so, 0);
1318 goto out;
1319 }
1320
1321 lck_mtx_lock(&unp_connect_lock);
1322
1323 if (vp->v_socket == 0) {
1324 lck_mtx_unlock(&unp_connect_lock);
1325 error = ECONNREFUSED;
1326 socket_lock(so, 0);
1327 goto out;
1328 }
1329
1330 socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1331 so2 = vp->v_socket;
1332 lck_mtx_unlock(&unp_connect_lock);
1333
1334
1335 if (so2->so_pcb == NULL) {
1336 error = ECONNREFUSED;
1337 if (so != so2) {
1338 socket_unlock(so2, 1);
1339 socket_lock(so, 0);
1340 } else {
1341 /* Release the reference held for the listen socket */
1342 VERIFY(so2->so_usecount > 0);
1343 so2->so_usecount--;
1344 }
1345 goto out;
1346 }
1347
1348 if (so < so2) {
1349 socket_unlock(so2, 0);
1350 socket_lock(so, 0);
1351 socket_lock(so2, 0);
1352 } else if (so > so2) {
1353 socket_lock(so, 0);
1354 }
1355 /*
1356 * Check if socket was connected while we were trying to
1357 * get the socket locks in order.
1358 * XXX - probably shouldn't return an error for SOCK_DGRAM
1359 */
1360 if ((so->so_state & SS_ISCONNECTED) != 0) {
1361 error = EISCONN;
1362 goto decref_out;
1363 }
1364
1365 if (so->so_type != so2->so_type) {
1366 error = EPROTOTYPE;
1367 goto decref_out;
1368 }
1369
1370 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1371 /* Release the incoming socket but keep a reference */
1372 socket_unlock(so, 0);
1373
1374 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1375 (so3 = sonewconn(so2, 0, nam)) == 0) {
1376 error = ECONNREFUSED;
1377 if (so != so2) {
1378 socket_unlock(so2, 1);
1379 socket_lock(so, 0);
1380 } else {
1381 socket_lock(so, 0);
1382 /* Release the reference held for
1383 * listen socket.
1384 */
1385 VERIFY(so2->so_usecount > 0);
1386 so2->so_usecount--;
1387 }
1388 goto out;
1389 }
1390 unp2 = sotounpcb(so2);
1391 unp3 = sotounpcb(so3);
1392 if (unp2->unp_addr) {
1393 unp3->unp_addr = (struct sockaddr_un *)
1394 dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1395 }
1396
1397 /*
1398 * unp_peercred management:
1399 *
1400 * The connecter's (client's) credentials are copied
1401 * from its process structure at the time of connect()
1402 * (which is now).
1403 */
1404 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1405 unp3->unp_flags |= UNP_HAVEPC;
1406 /*
1407 * The receiver's (server's) credentials are copied
1408 * from the unp_peercred member of socket on which the
1409 * former called listen(); unp_listen() cached that
1410 * process's credentials at that time so we can use
1411 * them now.
1412 */
1413 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1414 ("unp_connect: listener without cached peercred"));
1415
1416 /* Here we need to have both so and so2 locks and so2
1417 * is already locked. Lock ordering is required.
1418 */
1419 if (so < so2) {
1420 socket_unlock(so2, 0);
1421 socket_lock(so, 0);
1422 socket_lock(so2, 0);
1423 } else {
1424 socket_lock(so, 0);
1425 }
1426
1427 /* Check again if the socket state changed when its lock was released */
1428 if ((so->so_state & SS_ISCONNECTED) != 0) {
1429 error = EISCONN;
1430 socket_unlock(so2, 1);
1431 socket_lock(so3, 0);
1432 sofreelastref(so3, 1);
1433 goto out;
1434 }
1435 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1436 sizeof(unp->unp_peercred));
1437 unp->unp_flags |= UNP_HAVEPC;
1438
1439 /* Hold the reference on listening socket until the end */
1440 socket_unlock(so2, 0);
1441 list_so = so2;
1442
1443 /* Lock ordering doesn't matter because so3 was just created */
1444 socket_lock(so3, 1);
1445 so2 = so3;
1446
1447 /*
1448 * Enable tracing for mDNSResponder endpoints. (The use
1449 * of sizeof instead of strlen below takes the null
1450 * terminating character into account.)
1451 */
1452 if (unpst_tracemdns &&
1453 !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1454 sizeof(MDNSRESPONDER_PATH))) {
1455 unp->unp_flags |= UNP_TRACE_MDNS;
1456 unp2->unp_flags |= UNP_TRACE_MDNS;
1457 }
1458 }
1459
1460 error = unp_connect2(so, so2);
1461
1462 decref_out:
1463 if (so2 != NULL) {
1464 if (so != so2) {
1465 socket_unlock(so2, 1);
1466 } else {
1467 /* Release the extra reference held for the listen socket.
1468 * This is possible only for SOCK_DGRAM sockets. We refuse
1469 * connecting to the same socket for SOCK_STREAM sockets.
1470 */
1471 VERIFY(so2->so_usecount > 0);
1472 so2->so_usecount--;
1473 }
1474 }
1475
1476 if (list_so != NULL) {
1477 socket_lock(list_so, 0);
1478 socket_unlock(list_so, 1);
1479 }
1480
1481 out:
1482 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1483 vnode_put(vp);
1484 return error;
1485 }
1486
1487 /*
1488 * Returns: 0 Success
1489 * EPROTOTYPE Protocol wrong type for socket
1490 * EINVAL Invalid argument
1491 */
1492 int
unp_connect2(struct socket * so,struct socket * so2)1493 unp_connect2(struct socket *so, struct socket *so2)
1494 {
1495 struct unpcb *unp = sotounpcb(so);
1496 struct unpcb *unp2;
1497
1498 if (so2->so_type != so->so_type) {
1499 return EPROTOTYPE;
1500 }
1501
1502 unp2 = sotounpcb(so2);
1503
1504 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1505 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1506
1507 /* Verify both sockets are still opened */
1508 if (unp == 0 || unp2 == 0) {
1509 return EINVAL;
1510 }
1511
1512 unp->unp_conn = unp2;
1513 so2->so_usecount++;
1514
1515 switch (so->so_type) {
1516 case SOCK_DGRAM:
1517 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1518
1519 if (so != so2) {
1520 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1521 /* Keep an extra reference on so2 that will be dropped
1522 * soon after getting the locks in order
1523 */
1524 socket_unlock(so2, 0);
1525 soisconnected(so);
1526 unp_get_locks_in_order(so, so2);
1527 VERIFY(so2->so_usecount > 0);
1528 so2->so_usecount--;
1529 } else {
1530 soisconnected(so);
1531 }
1532
1533 break;
1534
1535 case SOCK_STREAM:
1536 /* This takes care of socketpair */
1537 if (!(unp->unp_flags & UNP_HAVEPC) &&
1538 !(unp2->unp_flags & UNP_HAVEPC)) {
1539 cru2x(kauth_cred_get(), &unp->unp_peercred);
1540 unp->unp_flags |= UNP_HAVEPC;
1541
1542 cru2x(kauth_cred_get(), &unp2->unp_peercred);
1543 unp2->unp_flags |= UNP_HAVEPC;
1544 }
1545 unp2->unp_conn = unp;
1546 so->so_usecount++;
1547
1548 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1549 socket_unlock(so, 0);
1550 soisconnected(so2);
1551
1552 /* Keep an extra reference on so2, that will be dropped soon after
1553 * getting the locks in order again.
1554 */
1555 socket_unlock(so2, 0);
1556
1557 socket_lock(so, 0);
1558 soisconnected(so);
1559
1560 unp_get_locks_in_order(so, so2);
1561 /* Decrement the extra reference left before */
1562 VERIFY(so2->so_usecount > 0);
1563 so2->so_usecount--;
1564 break;
1565
1566 default:
1567 panic("unknown socket type %d in unp_connect2", so->so_type);
1568 }
1569 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1570 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1571 return 0;
1572 }
1573
1574 static void
unp_disconnect(struct unpcb * unp)1575 unp_disconnect(struct unpcb *unp)
1576 {
1577 struct unpcb *unp2 = NULL;
1578 struct socket *so2 = NULL, *so;
1579 struct socket *waitso;
1580 int so_locked = 1, strdisconn = 0;
1581
1582 so = unp->unp_socket;
1583 if (unp->unp_conn == NULL) {
1584 return;
1585 }
1586 lck_mtx_lock(&unp_disconnect_lock);
1587 while (disconnect_in_progress != 0) {
1588 if (so_locked == 1) {
1589 socket_unlock(so, 0);
1590 so_locked = 0;
1591 }
1592 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1593 PSOCK, "disconnect", NULL);
1594 }
1595 disconnect_in_progress = 1;
1596 lck_mtx_unlock(&unp_disconnect_lock);
1597
1598 if (so_locked == 0) {
1599 socket_lock(so, 0);
1600 so_locked = 1;
1601 }
1602
1603 unp2 = unp->unp_conn;
1604
1605 if (unp2 == 0 || unp2->unp_socket == NULL) {
1606 goto out;
1607 }
1608 so2 = unp2->unp_socket;
1609
1610 try_again:
1611 if (so == so2) {
1612 if (so_locked == 0) {
1613 socket_lock(so, 0);
1614 }
1615 waitso = so;
1616 } else if (so < so2) {
1617 if (so_locked == 0) {
1618 socket_lock(so, 0);
1619 }
1620 socket_lock(so2, 1);
1621 waitso = so2;
1622 } else {
1623 if (so_locked == 1) {
1624 socket_unlock(so, 0);
1625 }
1626 socket_lock(so2, 1);
1627 socket_lock(so, 0);
1628 waitso = so;
1629 }
1630 so_locked = 1;
1631
1632 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1633 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1634
1635 /* Check for the UNP_DONTDISCONNECT flag, if it
1636 * is set, release both sockets and go to sleep
1637 */
1638
1639 if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1640 if (so != so2) {
1641 socket_unlock(so2, 1);
1642 }
1643 so_locked = 0;
1644
1645 (void)msleep(waitso->so_pcb, &unp->unp_mtx,
1646 PSOCK | PDROP, "unpdisconnect", NULL);
1647 goto try_again;
1648 }
1649
1650 if (unp->unp_conn == NULL) {
1651 panic("unp_conn became NULL after sleep");
1652 }
1653
1654 unp->unp_conn = NULL;
1655 VERIFY(so2->so_usecount > 0);
1656 so2->so_usecount--;
1657
1658 if (unp->unp_flags & UNP_TRACE_MDNS) {
1659 unp->unp_flags &= ~UNP_TRACE_MDNS;
1660 }
1661
1662 switch (unp->unp_socket->so_type) {
1663 case SOCK_DGRAM:
1664 LIST_REMOVE(unp, unp_reflink);
1665 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1666 if (so != so2) {
1667 socket_unlock(so2, 1);
1668 }
1669 break;
1670
1671 case SOCK_STREAM:
1672 unp2->unp_conn = NULL;
1673 VERIFY(so->so_usecount > 0);
1674 so->so_usecount--;
1675
1676 /*
1677 * Set the socket state correctly but do a wakeup later when
1678 * we release all locks except the socket lock, this will avoid
1679 * a deadlock.
1680 */
1681 unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1682 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1683
1684 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1685 unp2->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1686
1687 if (unp2->unp_flags & UNP_TRACE_MDNS) {
1688 unp2->unp_flags &= ~UNP_TRACE_MDNS;
1689 }
1690
1691 strdisconn = 1;
1692 break;
1693 default:
1694 panic("unknown socket type %d", so->so_type);
1695 }
1696 out:
1697 lck_mtx_lock(&unp_disconnect_lock);
1698 disconnect_in_progress = 0;
1699 wakeup(&disconnect_in_progress);
1700 lck_mtx_unlock(&unp_disconnect_lock);
1701
1702 if (strdisconn) {
1703 socket_unlock(so, 0);
1704 soisdisconnected(so2);
1705 socket_unlock(so2, 1);
1706
1707 socket_lock(so, 0);
1708 soisdisconnected(so);
1709 }
1710 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1711 return;
1712 }
1713
1714 /*
1715 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1716 * The unpcb_compat data structure is passed to user space and must not change.
1717 */
1718 static void
unpcb_to_compat(struct unpcb * up,struct unpcb_compat * cp)1719 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1720 {
1721 #if defined(__LP64__)
1722 cp->unp_link.le_next = (u_int32_t)
1723 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1724 cp->unp_link.le_prev = (u_int32_t)
1725 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1726 #else
1727 cp->unp_link.le_next = (struct unpcb_compat *)
1728 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1729 cp->unp_link.le_prev = (struct unpcb_compat **)
1730 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1731 #endif
1732 cp->unp_socket = (_UNPCB_PTR(struct socket *))
1733 VM_KERNEL_ADDRPERM(up->unp_socket);
1734 cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1735 VM_KERNEL_ADDRPERM(up->unp_vnode);
1736 cp->unp_ino = up->unp_ino;
1737 cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1738 VM_KERNEL_ADDRPERM(up->unp_conn);
1739 cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1740 #if defined(__LP64__)
1741 cp->unp_reflink.le_next =
1742 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1743 cp->unp_reflink.le_prev =
1744 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1745 #else
1746 cp->unp_reflink.le_next =
1747 (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1748 cp->unp_reflink.le_prev =
1749 (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1750 #endif
1751 cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1752 VM_KERNEL_ADDRPERM(up->unp_addr);
1753 cp->unp_cc = up->unp_cc;
1754 cp->unp_mbcnt = up->unp_mbcnt;
1755 cp->unp_gencnt = up->unp_gencnt;
1756 }
1757
1758 static int
1759 unp_pcblist SYSCTL_HANDLER_ARGS
1760 {
1761 #pragma unused(oidp,arg2)
1762 int error, i, n;
1763 struct unpcb *unp, **unp_list __bidi_indexable;
1764 size_t unp_list_len;
1765 unp_gen_t gencnt;
1766 struct xunpgen xug;
1767 struct unp_head *head;
1768
1769 lck_rw_lock_shared(&unp_list_mtx);
1770 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1771
1772 /*
1773 * The process of preparing the PCB list is too time-consuming and
1774 * resource-intensive to repeat twice on every request.
1775 */
1776 if (req->oldptr == USER_ADDR_NULL) {
1777 n = unp_count;
1778 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1779 sizeof(struct xunpcb);
1780 lck_rw_done(&unp_list_mtx);
1781 return 0;
1782 }
1783
1784 if (req->newptr != USER_ADDR_NULL) {
1785 lck_rw_done(&unp_list_mtx);
1786 return EPERM;
1787 }
1788
1789 /*
1790 * OK, now we're committed to doing something.
1791 */
1792 gencnt = unp_gencnt;
1793 n = unp_count;
1794
1795 bzero(&xug, sizeof(xug));
1796 xug.xug_len = sizeof(xug);
1797 xug.xug_count = n;
1798 xug.xug_gen = gencnt;
1799 xug.xug_sogen = so_gencnt;
1800 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1801 if (error) {
1802 lck_rw_done(&unp_list_mtx);
1803 return error;
1804 }
1805
1806 /*
1807 * We are done if there is no pcb
1808 */
1809 if (n == 0) {
1810 lck_rw_done(&unp_list_mtx);
1811 return 0;
1812 }
1813
1814 unp_list_len = n;
1815 unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1816 if (unp_list == 0) {
1817 lck_rw_done(&unp_list_mtx);
1818 return ENOMEM;
1819 }
1820
1821 for (unp = head->lh_first, i = 0; unp && i < n;
1822 unp = unp->unp_link.le_next) {
1823 if (unp->unp_gencnt <= gencnt) {
1824 unp_list[i++] = unp;
1825 }
1826 }
1827 n = i; /* in case we lost some during malloc */
1828
1829 error = 0;
1830 for (i = 0; i < n; i++) {
1831 unp = unp_list[i];
1832 if (unp->unp_gencnt <= gencnt) {
1833 struct xunpcb xu;
1834
1835 bzero(&xu, sizeof(xu));
1836 xu.xu_len = sizeof(xu);
1837 xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1838 VM_KERNEL_ADDRPERM(unp);
1839 /*
1840 * XXX - need more locking here to protect against
1841 * connect/disconnect races for SMP.
1842 */
1843 if (unp->unp_addr) {
1844 SOCKADDR_COPY(unp->unp_addr, &xu.xu_au);
1845 }
1846 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1847 SOCKADDR_COPY(unp->unp_conn->unp_addr, &xu.xu_cau);
1848 }
1849 unpcb_to_compat(unp, &xu.xu_unp);
1850 sotoxsocket(unp->unp_socket, &xu.xu_socket);
1851 error = SYSCTL_OUT(req, &xu, sizeof(xu));
1852 }
1853 }
1854 if (!error) {
1855 /*
1856 * Give the user an updated idea of our state.
1857 * If the generation differs from what we told
1858 * her before, she knows that something happened
1859 * while we were processing this request, and it
1860 * might be necessary to retry.
1861 */
1862 bzero(&xug, sizeof(xug));
1863 xug.xug_len = sizeof(xug);
1864 xug.xug_gen = unp_gencnt;
1865 xug.xug_sogen = so_gencnt;
1866 xug.xug_count = unp_count;
1867 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1868 }
1869 kfree_type(struct unpcb *, unp_list_len, unp_list);
1870 lck_rw_done(&unp_list_mtx);
1871 return error;
1872 }
1873
1874 const caddr_t SYSCTL_SOCK_DGRAM_ARG = __unsafe_forge_single(caddr_t, SOCK_DGRAM);
1875 const caddr_t SYSCTL_SOCK_STREAM_ARG = __unsafe_forge_single(caddr_t, SOCK_STREAM);
1876
1877 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1878 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1879 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist, "S,xunpcb",
1880 "List of active local datagram sockets");
1881 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1882 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1883 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist, "S,xunpcb",
1884 "List of active local stream sockets");
1885
1886 #if XNU_TARGET_OS_OSX
1887
1888 static int
1889 unp_pcblist64 SYSCTL_HANDLER_ARGS
1890 {
1891 #pragma unused(oidp,arg2)
1892 int error, i, n;
1893 struct unpcb *unp, **unp_list;
1894 unp_gen_t gencnt;
1895 struct xunpgen xug;
1896 struct unp_head *head;
1897
1898 lck_rw_lock_shared(&unp_list_mtx);
1899 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1900
1901 /*
1902 * The process of preparing the PCB list is too time-consuming and
1903 * resource-intensive to repeat twice on every request.
1904 */
1905 if (req->oldptr == USER_ADDR_NULL) {
1906 n = unp_count;
1907 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1908 (sizeof(struct xunpcb64));
1909 lck_rw_done(&unp_list_mtx);
1910 return 0;
1911 }
1912
1913 if (req->newptr != USER_ADDR_NULL) {
1914 lck_rw_done(&unp_list_mtx);
1915 return EPERM;
1916 }
1917
1918 /*
1919 * OK, now we're committed to doing something.
1920 */
1921 gencnt = unp_gencnt;
1922 n = unp_count;
1923
1924 bzero(&xug, sizeof(xug));
1925 xug.xug_len = sizeof(xug);
1926 xug.xug_count = n;
1927 xug.xug_gen = gencnt;
1928 xug.xug_sogen = so_gencnt;
1929 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1930 if (error) {
1931 lck_rw_done(&unp_list_mtx);
1932 return error;
1933 }
1934
1935 /*
1936 * We are done if there is no pcb
1937 */
1938 if (n == 0) {
1939 lck_rw_done(&unp_list_mtx);
1940 return 0;
1941 }
1942
1943 size_t unp_list_len = n;
1944 unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1945 if (unp_list == 0) {
1946 lck_rw_done(&unp_list_mtx);
1947 return ENOMEM;
1948 }
1949
1950 for (unp = head->lh_first, i = 0; unp && i < n;
1951 unp = unp->unp_link.le_next) {
1952 if (unp->unp_gencnt <= gencnt) {
1953 unp_list[i++] = unp;
1954 }
1955 }
1956 n = i; /* in case we lost some during malloc */
1957
1958 error = 0;
1959 for (i = 0; i < n; i++) {
1960 unp = unp_list[i];
1961 if (unp->unp_gencnt <= gencnt) {
1962 struct xunpcb64 xu;
1963 size_t xu_len = sizeof(struct xunpcb64);
1964
1965 bzero(&xu, xu_len);
1966 xu.xu_len = (u_int32_t)xu_len;
1967 xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1968 xu.xunp_link.le_next = (u_int64_t)
1969 VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1970 xu.xunp_link.le_prev = (u_int64_t)
1971 VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1972 xu.xunp_socket = (u_int64_t)
1973 VM_KERNEL_ADDRPERM(unp->unp_socket);
1974 xu.xunp_vnode = (u_int64_t)
1975 VM_KERNEL_ADDRPERM(unp->unp_vnode);
1976 xu.xunp_ino = unp->unp_ino;
1977 xu.xunp_conn = (u_int64_t)
1978 VM_KERNEL_ADDRPERM(unp->unp_conn);
1979 xu.xunp_refs = (u_int64_t)
1980 VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1981 xu.xunp_reflink.le_next = (u_int64_t)
1982 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1983 xu.xunp_reflink.le_prev = (u_int64_t)
1984 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1985 xu.xunp_cc = unp->unp_cc;
1986 xu.xunp_mbcnt = unp->unp_mbcnt;
1987 xu.xunp_gencnt = unp->unp_gencnt;
1988
1989 if (unp->unp_socket) {
1990 sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1991 }
1992
1993 /*
1994 * XXX - need more locking here to protect against
1995 * connect/disconnect races for SMP.
1996 */
1997 if (unp->unp_addr) {
1998 bcopy(unp->unp_addr, &xu.xu_au,
1999 unp->unp_addr->sun_len);
2000 }
2001 if (unp->unp_conn && unp->unp_conn->unp_addr) {
2002 bcopy(unp->unp_conn->unp_addr,
2003 &xu.xu_cau,
2004 unp->unp_conn->unp_addr->sun_len);
2005 }
2006
2007 error = SYSCTL_OUT(req, &xu, xu_len);
2008 }
2009 }
2010 if (!error) {
2011 /*
2012 * Give the user an updated idea of our state.
2013 * If the generation differs from what we told
2014 * her before, she knows that something happened
2015 * while we were processing this request, and it
2016 * might be necessary to retry.
2017 */
2018 bzero(&xug, sizeof(xug));
2019 xug.xug_len = sizeof(xug);
2020 xug.xug_gen = unp_gencnt;
2021 xug.xug_sogen = so_gencnt;
2022 xug.xug_count = unp_count;
2023 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2024 }
2025 kfree_type(struct unpcb *, unp_list_len, unp_list);
2026 lck_rw_done(&unp_list_mtx);
2027 return error;
2028 }
2029
2030 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
2031 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2032 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist64, "S,xunpcb64",
2033 "List of active local datagram sockets 64 bit");
2034 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
2035 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2036 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist64, "S,xunpcb64",
2037 "List of active local stream sockets 64 bit");
2038
2039 #endif /* XNU_TARGET_OS_OSX */
2040
2041 static int
2042 unp_pcblist_n SYSCTL_HANDLER_ARGS
2043 {
2044 #pragma unused(oidp,arg2)
2045 int error = 0;
2046 int i, n;
2047 struct unpcb *unp;
2048 unp_gen_t gencnt;
2049 struct xunpgen xug;
2050 struct unp_head *head;
2051 void *buf __single = NULL;
2052 size_t item_size = ROUNDUP64(sizeof(struct xunpcb_n)) +
2053 ROUNDUP64(sizeof(struct xsocket_n)) +
2054 2 * ROUNDUP64(sizeof(struct xsockbuf_n)) +
2055 ROUNDUP64(sizeof(struct xsockstat_n));
2056
2057 buf = kalloc_data(item_size, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2058
2059 lck_rw_lock_shared(&unp_list_mtx);
2060
2061 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
2062
2063 /*
2064 * The process of preparing the PCB list is too time-consuming and
2065 * resource-intensive to repeat twice on every request.
2066 */
2067 if (req->oldptr == USER_ADDR_NULL) {
2068 n = unp_count;
2069 req->oldidx = 2 * sizeof(xug) + (n + n / 8) * item_size;
2070 goto done;
2071 }
2072
2073 if (req->newptr != USER_ADDR_NULL) {
2074 error = EPERM;
2075 goto done;
2076 }
2077
2078 /*
2079 * OK, now we're committed to doing something.
2080 */
2081 gencnt = unp_gencnt;
2082 n = unp_count;
2083
2084 bzero(&xug, sizeof(xug));
2085 xug.xug_len = sizeof(xug);
2086 xug.xug_count = n;
2087 xug.xug_gen = gencnt;
2088 xug.xug_sogen = so_gencnt;
2089 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2090 if (error != 0) {
2091 goto done;
2092 }
2093
2094 /*
2095 * We are done if there is no pcb
2096 */
2097 if (n == 0) {
2098 goto done;
2099 }
2100
2101 for (i = 0, unp = head->lh_first;
2102 i < n && unp != NULL;
2103 i++, unp = unp->unp_link.le_next) {
2104 struct xunpcb_n *xu = (struct xunpcb_n *)buf;
2105 struct xsocket_n *xso = (struct xsocket_n *)
2106 ADVANCE64(xu, sizeof(*xu));
2107 struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *)
2108 ADVANCE64(xso, sizeof(*xso));
2109 struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *)
2110 ADVANCE64(xsbrcv, sizeof(*xsbrcv));
2111 struct xsockstat_n *xsostats = (struct xsockstat_n *)
2112 ADVANCE64(xsbsnd, sizeof(*xsbsnd));
2113
2114 if (unp->unp_gencnt > gencnt) {
2115 continue;
2116 }
2117
2118 bzero(buf, item_size);
2119
2120 xu->xunp_len = sizeof(struct xunpcb_n);
2121 xu->xunp_kind = XSO_UNPCB;
2122 xu->xunp_unpp = (uint64_t)VM_KERNEL_ADDRPERM(unp);
2123 xu->xunp_vnode = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_vnode);
2124 xu->xunp_ino = unp->unp_ino;
2125 xu->xunp_conn = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_conn);
2126 xu->xunp_refs = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
2127 xu->xunp_reflink = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
2128 xu->xunp_cc = unp->unp_cc;
2129 xu->xunp_mbcnt = unp->unp_mbcnt;
2130 xu->xunp_flags = unp->unp_flags;
2131 xu->xunp_gencnt = unp->unp_gencnt;
2132
2133 if (unp->unp_addr) {
2134 SOCKADDR_COPY(unp->unp_addr, &xu->xu_au);
2135 }
2136 if (unp->unp_conn && unp->unp_conn->unp_addr) {
2137 SOCKADDR_COPY(unp->unp_conn->unp_addr, &xu->xu_cau);
2138 }
2139 sotoxsocket_n(unp->unp_socket, xso);
2140 sbtoxsockbuf_n(unp->unp_socket ?
2141 &unp->unp_socket->so_rcv : NULL, xsbrcv);
2142 sbtoxsockbuf_n(unp->unp_socket ?
2143 &unp->unp_socket->so_snd : NULL, xsbsnd);
2144 sbtoxsockstat_n(unp->unp_socket, xsostats);
2145
2146 error = SYSCTL_OUT(req, buf, item_size);
2147 if (error != 0) {
2148 break;
2149 }
2150 }
2151 if (error == 0) {
2152 /*
2153 * Give the user an updated idea of our state.
2154 * If the generation differs from what we told
2155 * her before, she knows that something happened
2156 * while we were processing this request, and it
2157 * might be necessary to retry.
2158 */
2159 bzero(&xug, sizeof(xug));
2160 xug.xug_len = sizeof(xug);
2161 xug.xug_gen = unp_gencnt;
2162 xug.xug_sogen = so_gencnt;
2163 xug.xug_count = unp_count;
2164 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2165 }
2166 done:
2167 lck_rw_done(&unp_list_mtx);
2168 kfree_data(buf, item_size);
2169 return error;
2170 }
2171
2172 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist_n,
2173 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2174 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist_n, "S,xunpcb_n",
2175 "List of active local datagram sockets");
2176 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist_n,
2177 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2178 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist_n, "S,xunpcb_n",
2179 "List of active local stream sockets");
2180
2181 static void
unp_shutdown(struct unpcb * unp)2182 unp_shutdown(struct unpcb *unp)
2183 {
2184 struct socket *so = unp->unp_socket;
2185 struct socket *so2;
2186 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
2187 so2 = unp->unp_conn->unp_socket;
2188 unp_get_locks_in_order(so, so2);
2189 socantrcvmore(so2);
2190 socket_unlock(so2, 1);
2191 }
2192 }
2193
2194 static void
unp_drop(struct unpcb * unp,int errno)2195 unp_drop(struct unpcb *unp, int errno)
2196 {
2197 struct socket *so = unp->unp_socket;
2198
2199 so->so_error = (u_short)errno;
2200 unp_disconnect(unp);
2201 }
2202
2203 /*
2204 * fg_insertuipc_mark
2205 *
2206 * Description: Mark fileglob for insertion onto message queue if needed
2207 * Also takes fileglob reference
2208 *
2209 * Parameters: fg Fileglob pointer to insert
2210 *
2211 * Returns: true, if the fileglob needs to be inserted onto msg queue
2212 *
2213 * Locks: Takes and drops fg_lock, potentially many times
2214 */
2215 static boolean_t
fg_insertuipc_mark(struct fileglob * fg)2216 fg_insertuipc_mark(struct fileglob * fg)
2217 {
2218 boolean_t insert = FALSE;
2219
2220 lck_mtx_lock_spin(&fg->fg_lock);
2221 while (fg->fg_lflags & FG_RMMSGQ) {
2222 lck_mtx_convert_spin(&fg->fg_lock);
2223
2224 fg->fg_lflags |= FG_WRMMSGQ;
2225 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
2226 }
2227
2228 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2229 fg->fg_msgcount++;
2230 if (fg->fg_msgcount == 1) {
2231 fg->fg_lflags |= FG_INSMSGQ;
2232 insert = TRUE;
2233 }
2234 lck_mtx_unlock(&fg->fg_lock);
2235 return insert;
2236 }
2237
2238 /*
2239 * fg_insertuipc
2240 *
2241 * Description: Insert marked fileglob onto message queue
2242 *
2243 * Parameters: fg Fileglob pointer to insert
2244 *
2245 * Returns: void
2246 *
2247 * Locks: Takes and drops fg_lock & uipc_lock
2248 * DO NOT call this function with proc_fdlock held as unp_gc()
2249 * can potentially try to acquire proc_fdlock, which can result
2250 * in a deadlock.
2251 */
2252 static void
fg_insertuipc(struct fileglob * fg)2253 fg_insertuipc(struct fileglob * fg)
2254 {
2255 if (fg->fg_lflags & FG_INSMSGQ) {
2256 lck_mtx_lock(&uipc_lock);
2257 LIST_INSERT_HEAD(&unp_msghead, fg, f_msglist);
2258 lck_mtx_unlock(&uipc_lock);
2259 lck_mtx_lock(&fg->fg_lock);
2260 fg->fg_lflags &= ~FG_INSMSGQ;
2261 if (fg->fg_lflags & FG_WINSMSGQ) {
2262 fg->fg_lflags &= ~FG_WINSMSGQ;
2263 wakeup(&fg->fg_lflags);
2264 }
2265 lck_mtx_unlock(&fg->fg_lock);
2266 }
2267 }
2268
2269 /*
2270 * fg_removeuipc_mark
2271 *
2272 * Description: Mark the fileglob for removal from message queue if needed
2273 * Also releases fileglob message queue reference
2274 *
2275 * Parameters: fg Fileglob pointer to remove
2276 *
2277 * Returns: true, if the fileglob needs to be removed from msg queue
2278 *
2279 * Locks: Takes and drops fg_lock, potentially many times
2280 */
2281 static boolean_t
fg_removeuipc_mark(struct fileglob * fg)2282 fg_removeuipc_mark(struct fileglob * fg)
2283 {
2284 boolean_t remove = FALSE;
2285
2286 lck_mtx_lock_spin(&fg->fg_lock);
2287 while (fg->fg_lflags & FG_INSMSGQ) {
2288 lck_mtx_convert_spin(&fg->fg_lock);
2289
2290 fg->fg_lflags |= FG_WINSMSGQ;
2291 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
2292 }
2293 fg->fg_msgcount--;
2294 if (fg->fg_msgcount == 0) {
2295 fg->fg_lflags |= FG_RMMSGQ;
2296 remove = TRUE;
2297 }
2298 lck_mtx_unlock(&fg->fg_lock);
2299 return remove;
2300 }
2301
2302 /*
2303 * fg_removeuipc
2304 *
2305 * Description: Remove marked fileglob from message queue
2306 *
2307 * Parameters: fg Fileglob pointer to remove
2308 *
2309 * Returns: void
2310 *
2311 * Locks: Takes and drops fg_lock & uipc_lock
2312 * DO NOT call this function with proc_fdlock held as unp_gc()
2313 * can potentially try to acquire proc_fdlock, which can result
2314 * in a deadlock.
2315 */
2316 static void
fg_removeuipc(struct fileglob * fg)2317 fg_removeuipc(struct fileglob * fg)
2318 {
2319 if (fg->fg_lflags & FG_RMMSGQ) {
2320 lck_mtx_lock(&uipc_lock);
2321 LIST_REMOVE(fg, f_msglist);
2322 lck_mtx_unlock(&uipc_lock);
2323 lck_mtx_lock(&fg->fg_lock);
2324 fg->fg_lflags &= ~FG_RMMSGQ;
2325 if (fg->fg_lflags & FG_WRMMSGQ) {
2326 fg->fg_lflags &= ~FG_WRMMSGQ;
2327 wakeup(&fg->fg_lflags);
2328 }
2329 lck_mtx_unlock(&fg->fg_lock);
2330 }
2331 }
2332
2333 /*
2334 * Returns: 0 Success
2335 * EMSGSIZE The new fd's will not fit
2336 * ENOBUFS Cannot alloc struct fileproc
2337 */
2338 int
unp_externalize(struct mbuf * rights)2339 unp_externalize(struct mbuf *rights)
2340 {
2341 proc_t p = current_proc();
2342 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
2343 struct fileglob **rp = (struct fileglob **)(cm + 1);
2344 const int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2345 int *fds __bidi_indexable;
2346 int error = 0;
2347
2348 fds = kalloc_data(newfds * sizeof(int), Z_WAITOK);
2349 if (fds == NULL) {
2350 error = ENOMEM;
2351 goto out;
2352 }
2353
2354 /*
2355 * Step 1:
2356 * Allocate all the fds, and if it doesn't fit,
2357 * then fail and discard everything.
2358 */
2359 proc_fdlock(p);
2360
2361 if (fdt_available_locked(p, newfds)) {
2362 for (int i = 0; i < newfds; i++) {
2363 error = fdalloc(p, 0, &fds[i]);
2364 if (error) {
2365 while (i-- > 0) {
2366 fdrelse(p, fds[i]);
2367 }
2368 break;
2369 }
2370 }
2371 } else {
2372 error = EMSGSIZE;
2373 }
2374
2375 proc_fdunlock(p);
2376
2377 if (error) {
2378 goto out;
2379 }
2380
2381 /*
2382 * Step 2:
2383 * At this point we are commited, and can't fail anymore.
2384 * Allocate all the fileprocs, and remove the files
2385 * from the queue.
2386 *
2387 * Until we call procfdtbl_releasefd(), fds are in flux
2388 * and can't be closed.
2389 */
2390 for (int i = 0; i < newfds; i++) {
2391 struct fileproc *fp = NULL;
2392
2393 fp = fileproc_alloc_init();
2394 fp->fp_glob = rp[i];
2395 if (fg_removeuipc_mark(rp[i])) {
2396 fg_removeuipc(rp[i]);
2397 }
2398
2399 proc_fdlock(p);
2400 procfdtbl_releasefd(p, fds[i], fp);
2401 proc_fdunlock(p);
2402 }
2403
2404 /*
2405 * Step 3:
2406 * Return the fds into `cm`.
2407 * Handle the fact ints and pointers do not have the same size.
2408 */
2409 int *fds_out = (int *)(cm + 1);
2410 memcpy(fds_out, fds, newfds * sizeof(int));
2411 if (sizeof(struct fileglob *) != sizeof(int)) {
2412 bzero(fds_out + newfds,
2413 newfds * (sizeof(struct fileglob *) - sizeof(int)));
2414 }
2415 OSAddAtomic(-newfds, &unp_rights);
2416
2417 out:
2418 if (error) {
2419 for (int i = 0; i < newfds; i++) {
2420 unp_discard(rp[i], p);
2421 }
2422 bzero(rp, newfds * sizeof(struct fileglob *));
2423 }
2424
2425 kfree_data(fds, newfds * sizeof(int));
2426 return error;
2427 }
2428
2429 void
unp_init(void)2430 unp_init(void)
2431 {
2432 _CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
2433 LIST_INIT(&unp_dhead);
2434 LIST_INIT(&unp_shead);
2435 }
2436
2437 #ifndef MIN
2438 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
2439 #endif
2440
2441 /*
2442 * Returns: 0 Success
2443 * EINVAL
2444 * EBADF
2445 */
2446 static int
unp_internalize(struct mbuf * control,proc_t p)2447 unp_internalize(struct mbuf *control, proc_t p)
2448 {
2449 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
2450 int *fds;
2451 struct fileglob **rp;
2452 struct fileproc *fp;
2453 int i, error;
2454 int oldfds;
2455 uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
2456
2457 /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
2458 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
2459 (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
2460 return EINVAL;
2461 }
2462 oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2463 bzero(fg_ins, sizeof(fg_ins));
2464
2465 proc_fdlock(p);
2466 fds = (int *)(cm + 1);
2467
2468 for (i = 0; i < oldfds; i++) {
2469 struct fileproc *tmpfp;
2470 if ((tmpfp = fp_get_noref_locked(p, fds[i])) == NULL) {
2471 proc_fdunlock(p);
2472 return EBADF;
2473 } else if (!fg_sendable(tmpfp->fp_glob)) {
2474 proc_fdunlock(p);
2475 return EINVAL;
2476 } else if (fp_isguarded(tmpfp, GUARD_SOCKET_IPC)) {
2477 error = fp_guard_exception(p,
2478 fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
2479 proc_fdunlock(p);
2480 return error;
2481 }
2482 }
2483 rp = (struct fileglob **)(cm + 1);
2484
2485 /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2486 * and doing them in-order would result in stomping over unprocessed fd's
2487 */
2488 for (i = (oldfds - 1); i >= 0; i--) {
2489 fp = fp_get_noref_locked(p, fds[i]);
2490 if (fg_insertuipc_mark(fp->fp_glob)) {
2491 fg_ins[i / 8] |= 0x80 >> (i % 8);
2492 }
2493 rp[i] = fp->fp_glob;
2494 }
2495 proc_fdunlock(p);
2496
2497 for (i = 0; i < oldfds; i++) {
2498 if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
2499 VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2500 fg_insertuipc(rp[i]);
2501 }
2502 (void) OSAddAtomic(1, &unp_rights);
2503 }
2504
2505 return 0;
2506 }
2507
2508 static void
unp_gc(thread_call_param_t arg0,thread_call_param_t arg1)2509 unp_gc(thread_call_param_t arg0, thread_call_param_t arg1)
2510 {
2511 #pragma unused(arg0, arg1)
2512 struct fileglob *fg;
2513 struct socket *so;
2514 static struct fileglob **extra_ref;
2515 struct fileglob **fpp;
2516 int nunref, i;
2517
2518 restart:
2519 lck_mtx_lock(&uipc_lock);
2520 unp_defer = 0;
2521 /*
2522 * before going through all this, set all FDs to
2523 * be NOT defered and NOT externally accessible
2524 */
2525 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2526 os_atomic_andnot(&fg->fg_flag, FMARK | FDEFER, relaxed);
2527 }
2528 do {
2529 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2530 lck_mtx_lock(&fg->fg_lock);
2531 /*
2532 * If the file is not open, skip it
2533 */
2534 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2535 lck_mtx_unlock(&fg->fg_lock);
2536 continue;
2537 }
2538 /*
2539 * If we already marked it as 'defer' in a
2540 * previous pass, then try process it this time
2541 * and un-mark it
2542 */
2543 if (fg->fg_flag & FDEFER) {
2544 os_atomic_andnot(&fg->fg_flag, FDEFER, relaxed);
2545 unp_defer--;
2546 } else {
2547 /*
2548 * if it's not defered, then check if it's
2549 * already marked.. if so skip it
2550 */
2551 if (fg->fg_flag & FMARK) {
2552 lck_mtx_unlock(&fg->fg_lock);
2553 continue;
2554 }
2555 /*
2556 * If all references are from messages
2557 * in transit, then skip it. it's not
2558 * externally accessible.
2559 */
2560 if (os_ref_get_count_raw(&fg->fg_count) ==
2561 fg->fg_msgcount) {
2562 lck_mtx_unlock(&fg->fg_lock);
2563 continue;
2564 }
2565 /*
2566 * If it got this far then it must be
2567 * externally accessible.
2568 */
2569 os_atomic_or(&fg->fg_flag, FMARK, relaxed);
2570 }
2571 /*
2572 * either it was defered, or it is externally
2573 * accessible and not already marked so.
2574 * Now check if it is possibly one of OUR sockets.
2575 */
2576 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2577 (so = (struct socket *)fg_get_data(fg)) == 0) {
2578 lck_mtx_unlock(&fg->fg_lock);
2579 continue;
2580 }
2581 if (so->so_proto->pr_domain != localdomain ||
2582 (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
2583 lck_mtx_unlock(&fg->fg_lock);
2584 continue;
2585 }
2586 /*
2587 * So, Ok, it's one of our sockets and it IS externally
2588 * accessible (or was defered). Now we look
2589 * to see if we hold any file descriptors in its
2590 * message buffers. Follow those links and mark them
2591 * as accessible too.
2592 *
2593 * In case a file is passed onto itself we need to
2594 * release the file lock.
2595 */
2596 lck_mtx_unlock(&fg->fg_lock);
2597 /*
2598 * It's safe to lock the socket after dropping fg_lock
2599 * because the socket isn't going away at this point.
2600 *
2601 * If we couldn't lock the socket or the socket buffer,
2602 * then it's because someone holding one of these
2603 * locks is stuck in unp_{internalize,externalize}().
2604 * Yield to that process and restart the garbage
2605 * collection.
2606 */
2607 if (!socket_try_lock(so)) {
2608 lck_mtx_unlock(&uipc_lock);
2609 goto restart;
2610 }
2611 so->so_usecount++;
2612 /*
2613 * Lock the receive socket buffer so that we can
2614 * iterate over its mbuf list.
2615 */
2616 if (sblock(&so->so_rcv, SBL_NOINTR | SBL_IGNDEFUNCT)) {
2617 socket_unlock(so, 1);
2618 lck_mtx_unlock(&uipc_lock);
2619 goto restart;
2620 }
2621 VERIFY(so->so_rcv.sb_flags & SB_LOCK);
2622 socket_unlock(so, 0);
2623 unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
2624 socket_lock(so, 0);
2625 sbunlock(&so->so_rcv, TRUE);
2626 /*
2627 * Unlock and release the reference acquired above.
2628 */
2629 socket_unlock(so, 1);
2630 }
2631 } while (unp_defer);
2632 /*
2633 * We grab an extra reference to each of the file table entries
2634 * that are not otherwise accessible and then free the rights
2635 * that are stored in messages on them.
2636 *
2637 * Here, we first take an extra reference to each inaccessible
2638 * descriptor. Then, we call sorflush ourself, since we know
2639 * it is a Unix domain socket anyhow. After we destroy all the
2640 * rights carried in messages, we do a last closef to get rid
2641 * of our extra reference. This is the last close, and the
2642 * unp_detach etc will shut down the socket.
2643 *
2644 * 91/09/19, [email protected]
2645 */
2646 size_t extra_ref_size = nfiles;
2647 extra_ref = kalloc_type(struct fileglob *, extra_ref_size, Z_WAITOK);
2648 if (extra_ref == NULL) {
2649 lck_mtx_unlock(&uipc_lock);
2650 return;
2651 }
2652 nunref = 0;
2653 fpp = extra_ref;
2654 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2655 lck_mtx_lock(&fg->fg_lock);
2656 /*
2657 * If it's not open, skip it
2658 */
2659 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2660 lck_mtx_unlock(&fg->fg_lock);
2661 continue;
2662 }
2663 /*
2664 * If all refs are from msgs, and it's not marked accessible
2665 * then it must be referenced from some unreachable cycle
2666 * of (shut-down) FDs, so include it in our
2667 * list of FDs to remove
2668 */
2669 if (fg->fg_flag & FMARK) {
2670 lck_mtx_unlock(&fg->fg_lock);
2671 continue;
2672 }
2673 if (os_ref_get_count_raw(&fg->fg_count) == fg->fg_msgcount) {
2674 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2675 *fpp++ = fg;
2676 nunref++;
2677 }
2678 lck_mtx_unlock(&fg->fg_lock);
2679 }
2680 lck_mtx_unlock(&uipc_lock);
2681
2682 /*
2683 * for each FD on our hit list, do the following two things
2684 */
2685 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2686 struct fileglob *tfg;
2687
2688 tfg = *fpp;
2689
2690 if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET) {
2691 so = (struct socket *)fg_get_data(tfg);
2692
2693 if (so) {
2694 socket_lock(so, 0);
2695 sorflush(so);
2696 socket_unlock(so, 0);
2697 }
2698 }
2699 }
2700 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2701 fg_drop(PROC_NULL, *fpp);
2702 }
2703
2704 kfree_type(struct fileglob *, extra_ref_size, extra_ref);
2705 }
2706
2707 void
unp_dispose(struct mbuf * m)2708 unp_dispose(struct mbuf *m)
2709 {
2710 if (m) {
2711 unp_scan(m, unp_discard, NULL);
2712 }
2713 }
2714
2715 /*
2716 * Returns: 0 Success
2717 */
2718 static int
unp_listen(struct unpcb * unp,proc_t p)2719 unp_listen(struct unpcb *unp, proc_t p)
2720 {
2721 kauth_cred_t safecred __single = kauth_cred_proc_ref(p);
2722 cru2x(safecred, &unp->unp_peercred);
2723 kauth_cred_unref(&safecred);
2724 unp->unp_flags |= UNP_HAVEPCCACHED;
2725 return 0;
2726 }
2727
2728 static void
unp_scan(struct mbuf * m0,void (* op)(struct fileglob *,void * arg),void * arg)2729 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
2730 {
2731 struct mbuf *m;
2732 struct fileglob **rp;
2733 struct cmsghdr *cm;
2734 int i;
2735 int qfds;
2736
2737 while (m0) {
2738 for (m = m0; m; m = m->m_next) {
2739 if (m->m_type == MT_CONTROL &&
2740 (size_t)m->m_len >= sizeof(*cm)) {
2741 cm = mtod(m, struct cmsghdr *);
2742 if (cm->cmsg_level != SOL_SOCKET ||
2743 cm->cmsg_type != SCM_RIGHTS) {
2744 continue;
2745 }
2746 qfds = (cm->cmsg_len - sizeof(*cm)) /
2747 sizeof(int);
2748 rp = (struct fileglob **)(cm + 1);
2749 for (i = 0; i < qfds; i++) {
2750 (*op)(*rp++, arg);
2751 }
2752 break; /* XXX, but saves time */
2753 }
2754 }
2755 m0 = m0->m_act;
2756 }
2757 }
2758
2759 static void
unp_mark(struct fileglob * fg,__unused void * arg)2760 unp_mark(struct fileglob *fg, __unused void *arg)
2761 {
2762 uint32_t oflags, nflags;
2763
2764 os_atomic_rmw_loop(&fg->fg_flag, oflags, nflags, relaxed, {
2765 if (oflags & FMARK) {
2766 os_atomic_rmw_loop_give_up(return );
2767 }
2768 nflags = oflags | FMARK | FDEFER;
2769 });
2770
2771 unp_defer++;
2772 }
2773
2774 static void
unp_discard(struct fileglob * fg,void * p)2775 unp_discard(struct fileglob *fg, void *p)
2776 {
2777 if (p == NULL) {
2778 p = current_proc(); /* XXX */
2779 }
2780 (void) OSAddAtomic(1, &unp_disposed);
2781 if (fg_removeuipc_mark(fg)) {
2782 VERIFY(fg->fg_lflags & FG_RMMSGQ);
2783 fg_removeuipc(fg);
2784 }
2785 (void) OSAddAtomic(-1, &unp_rights);
2786
2787 (void) fg_drop(p, fg);
2788 }
2789
2790 int
unp_lock(struct socket * so,int refcount,void * lr)2791 unp_lock(struct socket *so, int refcount, void * lr)
2792 {
2793 void * lr_saved __single;
2794 if (lr == 0) {
2795 lr_saved = __unsafe_forge_single(void*, __builtin_return_address(0));
2796 } else {
2797 lr_saved = lr;
2798 }
2799
2800 if (so->so_pcb) {
2801 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2802 } else {
2803 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x",
2804 so, lr_saved, so->so_usecount);
2805 }
2806
2807 if (so->so_usecount < 0) {
2808 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x",
2809 so, so->so_pcb, lr_saved, so->so_usecount);
2810 }
2811
2812 if (refcount) {
2813 VERIFY(so->so_usecount > 0);
2814 so->so_usecount++;
2815 }
2816 so->lock_lr[so->next_lock_lr] = lr_saved;
2817 so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2818 return 0;
2819 }
2820
2821 int
unp_unlock(struct socket * so,int refcount,void * lr)2822 unp_unlock(struct socket *so, int refcount, void * lr)
2823 {
2824 void * lr_saved __single;
2825 lck_mtx_t * mutex_held = NULL;
2826 struct unpcb *unp __single = sotounpcb(so);
2827
2828 if (lr == 0) {
2829 lr_saved = __unsafe_forge_single(void*, __builtin_return_address(0));
2830 } else {
2831 lr_saved = lr;
2832 }
2833
2834 if (refcount) {
2835 so->so_usecount--;
2836 }
2837
2838 if (so->so_usecount < 0) {
2839 panic("unp_unlock: so=%p usecount=%x", so, so->so_usecount);
2840 }
2841 if (so->so_pcb == NULL) {
2842 panic("unp_unlock: so=%p NO PCB usecount=%x", so, so->so_usecount);
2843 } else {
2844 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2845 }
2846 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2847 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2848 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2849
2850 if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2851 sofreelastref(so, 1);
2852
2853 if (unp->unp_addr != NULL) {
2854 free_sockaddr(unp->unp_addr);
2855 }
2856
2857 lck_mtx_unlock(mutex_held);
2858
2859 lck_mtx_destroy(&unp->unp_mtx, &unp_mtx_grp);
2860 zfree(unp_zone, unp);
2861 thread_call_enter(unp_gc_tcall);
2862 } else {
2863 lck_mtx_unlock(mutex_held);
2864 }
2865
2866 return 0;
2867 }
2868
2869 lck_mtx_t *
unp_getlock(struct socket * so,__unused int flags)2870 unp_getlock(struct socket *so, __unused int flags)
2871 {
2872 struct unpcb *unp = (struct unpcb *)so->so_pcb;
2873
2874
2875 if (so->so_pcb) {
2876 if (so->so_usecount < 0) {
2877 panic("unp_getlock: so=%p usecount=%x", so, so->so_usecount);
2878 }
2879 return &unp->unp_mtx;
2880 } else {
2881 panic("unp_getlock: so=%p NULL so_pcb", so);
2882 return so->so_proto->pr_domain->dom_mtx;
2883 }
2884 }
2885