xref: /xnu-11215.1.10/bsd/kern/uipc_syscalls.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 /*
2  * Copyright (c) 2000-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1989, 1990, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * sendfile(2) and related extensions:
33  * Copyright (c) 1998, David Greenman. All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgement:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  * 4. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  *
63  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
64  */
65 /*
66  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67  * support for mandatory and extensible security protections.  This notice
68  * is included in support of clause 2.2 (b) of the Apple Public License,
69  * Version 2.0.
70  */
71 
72 #include <sys/cdefs.h>
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/filedesc.h>
76 #include <sys/proc_internal.h>
77 #include <sys/file_internal.h>
78 #include <sys/vnode_internal.h>
79 #include <sys/malloc.h>
80 #include <sys/mcache.h>
81 #include <sys/mbuf.h>
82 #include <kern/locks.h>
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/signalvar.h>
86 #include <sys/socket.h>
87 #include <sys/socketvar.h>
88 #include <sys/kernel.h>
89 #include <sys/uio_internal.h>
90 #include <sys/kauth.h>
91 #include <kern/task.h>
92 #include <sys/priv.h>
93 #include <sys/sysctl.h>
94 #include <sys/sys_domain.h>
95 #include <sys/types.h>
96 
97 #include <security/audit/audit.h>
98 
99 #include <sys/kdebug.h>
100 #include <sys/sysproto.h>
101 #include <netinet/in.h>
102 #include <net/route.h>
103 #include <netinet/in_pcb.h>
104 
105 #include <os/log.h>
106 #include <os/ptrtools.h>
107 
108 #include <os/log.h>
109 
110 #if CONFIG_MACF_SOCKET_SUBSET
111 #include <security/mac_framework.h>
112 #endif /* MAC_SOCKET_SUBSET */
113 
114 #include <net/sockaddr_utils.h>
115 
116 extern char *proc_name_address(void *p);
117 
118 #define f_flag fp_glob->fg_flag
119 #define f_ops fp_glob->fg_ops
120 
121 #define DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETSOCK, 0)
122 #define DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETSOCK, 2)
123 #define DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETSOCK, 1)
124 #define DBG_LAYER_OUT_END       NETDBG_CODE(DBG_NETSOCK, 3)
125 #define DBG_FNC_SENDMSG         NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
126 #define DBG_FNC_SENDTO          NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
127 #define DBG_FNC_SENDIT          NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
128 #define DBG_FNC_RECVFROM        NETDBG_CODE(DBG_NETSOCK, (5 << 8))
129 #define DBG_FNC_RECVMSG         NETDBG_CODE(DBG_NETSOCK, (6 << 8))
130 #define DBG_FNC_RECVIT          NETDBG_CODE(DBG_NETSOCK, (7 << 8))
131 #define DBG_FNC_SENDFILE        NETDBG_CODE(DBG_NETSOCK, (10 << 8))
132 #define DBG_FNC_SENDFILE_WAIT   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
133 #define DBG_FNC_SENDFILE_READ   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
134 #define DBG_FNC_SENDFILE_SEND   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
135 #define DBG_FNC_SENDMSG_X       NETDBG_CODE(DBG_NETSOCK, (11 << 8))
136 #define DBG_FNC_RECVMSG_X       NETDBG_CODE(DBG_NETSOCK, (12 << 8))
137 
138 /* Forward declarations for referenced types */
139 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(void, void, __CCT_PTR);
140 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(uint8_t, uint8_t, __CCT_PTR);
141 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int32_t, int32, __CCT_REF);
142 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int, int, __CCT_REF);
143 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(user_ssize_t, user_ssize, __CCT_REF);
144 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(unsigned int, uint, __CCT_REF);
145 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(sae_connid_t, sae_connid, __CCT_REF);
146 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(socklen_t, socklen, __CCT_REF);
147 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct setsockopt_args, setsockopt_args, __CCT_REF);
148 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct connectx_args, connectx_args, __CCT_REF);
149 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct disconnectx_args, disconnectx_args, __CCT_REF);
150 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct cmsghdr, cmsghdr, __CCT_REF);
151 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct timeval, timeval, __CCT_REF);
152 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user64_timeval, user64_timeval, __CCT_REF);
153 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user32_timeval, user32_timeval, __CCT_REF);
154 
155 static int sendit(proc_ref_t, socket_ref_t, user_msghdr_ref_t, uio_t,
156     int, int32_ref_t );
157 static int recvit(proc_ref_t, int, user_msghdr_ref_t, uio_t, user_addr_t,
158     int32_ref_t);
159 static int connectit(socket_ref_t, sockaddr_ref_t);
160 static int getsockaddr(socket_ref_t, sockaddr_ref_ref_t, user_addr_t,
161     size_t, boolean_t);
162 static int getsockaddr_s(socket_ref_t, sockaddr_storage_ref_t,
163     user_addr_t, size_t, boolean_t);
164 #if SENDFILE
165 static void alloc_sendpkt(int, size_t, uint_ref_t, mbuf_ref_ref_t,
166     boolean_t);
167 #endif /* SENDFILE */
168 static int connectx_nocancel(proc_ref_t, connectx_args_ref_t, int_ref_t);
169 static int connectitx(socket_ref_t, sockaddr_ref_t,
170     sockaddr_ref_t, proc_ref_t, uint32_t, sae_associd_t,
171     sae_connid_ref_t, uio_t, unsigned int, user_ssize_ref_t);
172 static int disconnectx_nocancel(proc_ref_t, disconnectx_args_ref_t,
173     int_ref_t);
174 static int socket_common(proc_ref_t, int, int, int, pid_t, int32_ref_t, int);
175 
176 static int internalize_recv_msghdr_array(const void_ptr_t, int, int,
177     u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t);
178 static u_int externalize_recv_msghdr_array(proc_ref_t, socket_ref_t, void_ptr_t,
179     u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t, int_ref_t);
180 
181 static recv_msg_elem_ptr_t alloc_recv_msg_array(u_int count);
182 static int recv_msg_array_is_valid(recv_msg_elem_ptr_t, u_int count);
183 static void free_recv_msg_array(recv_msg_elem_ptr_t, u_int count);
184 static int copyout_control(proc_ref_t, mbuf_ref_t, user_addr_t control,
185     socklen_ref_t, int_ref_t, socket_ref_t);
186 
187 SYSCTL_DECL(_kern_ipc);
188 
189 #define SO_MAX_MSG_X_DEFAULT 256
190 
191 static u_int somaxsendmsgx = SO_MAX_MSG_X_DEFAULT;
192 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
193     CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
194 
195 static u_int somaxrecvmsgx = SO_MAX_MSG_X_DEFAULT;
196 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
197     CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
198 
199 static u_int missingpktinfo = 0;
200 SYSCTL_UINT(_kern_ipc, OID_AUTO, missingpktinfo,
201     CTLFLAG_RD | CTLFLAG_LOCKED, &missingpktinfo, 0, "");
202 
203 static int do_recvmsg_x_donttrunc = 0;
204 SYSCTL_INT(_kern_ipc, OID_AUTO, do_recvmsg_x_donttrunc,
205     CTLFLAG_RW | CTLFLAG_LOCKED, &do_recvmsg_x_donttrunc, 0, "");
206 
207 #if DEBUG || DEVELOPMENT
208 static int uipc_debug = 0;
209 SYSCTL_INT(_kern_ipc, OID_AUTO, debug,
210     CTLFLAG_RW | CTLFLAG_LOCKED, &uipc_debug, 0, "");
211 
212 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
213 #define DBG_PRINTF(...) if (uipc_debug != 0) {  \
214     os_log(OS_LOG_DEFAULT, __VA_ARGS__);        \
215 }
216 #else
217 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
218 #define DBG_PRINTF(...) do { } while (0)
219 #endif
220 
221 
222 /*
223  * Values for sendmsg_x_mode
224  * 0: default
225  * 1: sendit loop one at a time
226  * 2: old implementation
227  */
228 static u_int sendmsg_x_mode = 0;
229 SYSCTL_UINT(_kern_ipc, OID_AUTO, sendmsg_x_mode,
230     CTLFLAG_RW | CTLFLAG_LOCKED, &sendmsg_x_mode, 0, "");
231 
232 /*
233  * System call interface to the socket abstraction.
234  */
235 
236 extern const struct fileops socketops;
237 
238 /*
239  * Returns:	0			Success
240  *		EACCES			Mandatory Access Control failure
241  *	falloc:ENFILE
242  *	falloc:EMFILE
243  *	falloc:ENOMEM
244  *	socreate:EAFNOSUPPORT
245  *	socreate:EPROTOTYPE
246  *	socreate:EPROTONOSUPPORT
247  *	socreate:ENOBUFS
248  *	socreate:ENOMEM
249  *	socreate:???			[other protocol families, IPSEC]
250  */
251 int
socket(proc_ref_t p,struct socket_args * uap,int32_ref_t retval)252 socket(proc_ref_t p,
253     struct socket_args *uap,
254     int32_ref_t retval)
255 {
256 	return socket_common(p, uap->domain, uap->type, uap->protocol,
257 	           proc_selfpid(), retval, 0);
258 }
259 
260 int
socket_delegate(proc_ref_t p,struct socket_delegate_args * uap,int32_ref_t retval)261 socket_delegate(proc_ref_t p,
262     struct socket_delegate_args *uap,
263     int32_ref_t retval)
264 {
265 	return socket_common(p, uap->domain, uap->type, uap->protocol,
266 	           uap->epid, retval, 1);
267 }
268 
269 static int
socket_common(proc_ref_t p,int domain,int type,int protocol,pid_t epid,int32_ref_t retval,int delegate)270 socket_common(proc_ref_t p,
271     int domain,
272     int type,
273     int protocol,
274     pid_t epid,
275     int32_ref_t retval,
276     int delegate)
277 {
278 	socket_ref_t so;
279 	fileproc_ref_t  fp;
280 	int fd, error;
281 
282 	AUDIT_ARG(socket, domain, type, protocol);
283 #if CONFIG_MACF_SOCKET_SUBSET
284 	if ((error = mac_socket_check_create(kauth_cred_get(), domain,
285 	    type, protocol)) != 0) {
286 		return error;
287 	}
288 #endif /* MAC_SOCKET_SUBSET */
289 
290 	if (delegate) {
291 		error = priv_check_cred(kauth_cred_get(),
292 		    PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
293 		if (error) {
294 			return EACCES;
295 		}
296 	}
297 
298 	error = falloc(p, &fp, &fd);
299 	if (error) {
300 		return error;
301 	}
302 	fp->f_flag = FREAD | FWRITE;
303 	fp->f_ops = &socketops;
304 
305 	if (delegate) {
306 		error = socreate_delegate(domain, &so, type, protocol, epid);
307 	} else {
308 		error = socreate(domain, &so, type, protocol);
309 	}
310 
311 	if (error) {
312 		fp_free(p, fd, fp);
313 	} else {
314 		fp_set_data(fp, so);
315 
316 		proc_fdlock(p);
317 		procfdtbl_releasefd(p, fd, NULL);
318 
319 		if (ENTR_SHOULDTRACE) {
320 			KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
321 			    fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
322 		}
323 		fp_drop(p, fd, fp, 1);
324 		proc_fdunlock(p);
325 
326 		*retval = fd;
327 	}
328 	return error;
329 }
330 
331 /*
332  * Returns:	0			Success
333  *		EDESTADDRREQ		Destination address required
334  *		EBADF			Bad file descriptor
335  *		EACCES			Mandatory Access Control failure
336  *	file_socket:ENOTSOCK
337  *	file_socket:EBADF
338  *	getsockaddr:ENAMETOOLONG	Filename too long
339  *	getsockaddr:EINVAL		Invalid argument
340  *	getsockaddr:ENOMEM		Not enough space
341  *	getsockaddr:EFAULT		Bad address
342  *	sobindlock:???
343  */
344 /* ARGSUSED */
345 int
bind(__unused proc_t p,struct bind_args * uap,__unused int32_ref_t retval)346 bind(__unused proc_t p, struct bind_args *uap, __unused int32_ref_t retval)
347 {
348 	struct sockaddr_storage ss;
349 	sockaddr_ref_t  sa = NULL;
350 	socket_ref_t so;
351 	boolean_t want_free = TRUE;
352 	int error;
353 
354 	AUDIT_ARG(fd, uap->s);
355 	error = file_socket(uap->s, &so);
356 	if (error != 0) {
357 		return error;
358 	}
359 	if (so == NULL) {
360 		error = EBADF;
361 		goto out;
362 	}
363 	if (uap->name == USER_ADDR_NULL) {
364 		error = EDESTADDRREQ;
365 		goto out;
366 	}
367 	if (uap->namelen > sizeof(ss)) {
368 		error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
369 	} else {
370 		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
371 		if (error == 0) {
372 			sa = SA(&ss);
373 			want_free = FALSE;
374 		}
375 	}
376 	if (error != 0) {
377 		goto out;
378 	}
379 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
380 #if CONFIG_MACF_SOCKET_SUBSET
381 	if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
382 	    (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
383 		error = sobindlock(so, sa, 1);  /* will lock socket */
384 	}
385 #else
386 	error = sobindlock(so, sa, 1);          /* will lock socket */
387 #endif /* MAC_SOCKET_SUBSET */
388 	if (want_free) {
389 		free_sockaddr(sa);
390 	}
391 out:
392 	file_drop(uap->s);
393 	return error;
394 }
395 
396 /*
397  * Returns:	0			Success
398  *		EBADF
399  *		EACCES			Mandatory Access Control failure
400  *	file_socket:ENOTSOCK
401  *	file_socket:EBADF
402  *	solisten:EINVAL
403  *	solisten:EOPNOTSUPP
404  *	solisten:???
405  */
406 int
listen(__unused proc_ref_t p,struct listen_args * uap,__unused int32_ref_t retval)407 listen(__unused proc_ref_t p, struct listen_args *uap,
408     __unused int32_ref_t retval)
409 {
410 	int error;
411 	socket_ref_t so;
412 
413 	AUDIT_ARG(fd, uap->s);
414 	error = file_socket(uap->s, &so);
415 	if (error) {
416 		return error;
417 	}
418 	if (so != NULL)
419 #if CONFIG_MACF_SOCKET_SUBSET
420 	{
421 		error = mac_socket_check_listen(kauth_cred_get(), so);
422 		if (error == 0) {
423 			error = solisten(so, uap->backlog);
424 		}
425 	}
426 #else
427 	{ error = solisten(so, uap->backlog);}
428 #endif /* MAC_SOCKET_SUBSET */
429 	else {
430 		error = EBADF;
431 	}
432 
433 	file_drop(uap->s);
434 	return error;
435 }
436 
437 /*
438  * Returns:	fp_get_ftype:EBADF	Bad file descriptor
439  *		fp_get_ftype:ENOTSOCK	Socket operation on non-socket
440  *		:EFAULT			Bad address on copyin/copyout
441  *		:EBADF			Bad file descriptor
442  *		:EOPNOTSUPP		Operation not supported on socket
443  *		:EINVAL			Invalid argument
444  *		:EWOULDBLOCK		Operation would block
445  *		:ECONNABORTED		Connection aborted
446  *		:EINTR			Interrupted function
447  *		:EACCES			Mandatory Access Control failure
448  *		falloc:ENFILE		Too many files open in system
449  *		falloc:EMFILE		Too many open files
450  *		falloc:ENOMEM		Not enough space
451  *		0			Success
452  */
453 int
accept_nocancel(proc_ref_t p,struct accept_nocancel_args * uap,int32_ref_t retval)454 accept_nocancel(proc_ref_t p, struct accept_nocancel_args *uap,
455     int32_ref_t retval)
456 {
457 	fileproc_ref_t  fp;
458 	sockaddr_ref_t  sa = NULL;
459 	socklen_t namelen;
460 	int error;
461 	socket_ref_t  head;
462 	socket_ref_t so = NULL;
463 	lck_mtx_t *mutex_held;
464 	int fd = uap->s;
465 	int newfd;
466 	unsigned int fflag;
467 	int dosocklock = 0;
468 
469 	*retval = -1;
470 
471 	AUDIT_ARG(fd, uap->s);
472 
473 	if (uap->name) {
474 		error = copyin(uap->anamelen, (caddr_t)&namelen,
475 		    sizeof(socklen_t));
476 		if (error) {
477 			return error;
478 		}
479 	}
480 	error = fp_get_ftype(p, fd, DTYPE_SOCKET, ENOTSOCK, &fp);
481 	if (error) {
482 		return error;
483 	}
484 	head = (struct socket *)fp_get_data(fp);
485 
486 #if CONFIG_MACF_SOCKET_SUBSET
487 	if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
488 		goto out;
489 	}
490 #endif /* MAC_SOCKET_SUBSET */
491 
492 	socket_lock(head, 1);
493 
494 	if (head->so_proto->pr_getlock != NULL) {
495 		mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
496 		dosocklock = 1;
497 	} else {
498 		mutex_held = head->so_proto->pr_domain->dom_mtx;
499 		dosocklock = 0;
500 	}
501 
502 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
503 		if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
504 			error = EOPNOTSUPP;
505 		} else {
506 			/* POSIX: The socket is not accepting connections */
507 			error = EINVAL;
508 		}
509 		socket_unlock(head, 1);
510 		os_log(OS_LOG_DEFAULT, "%s:%d accept() SO_ACCEPTCONN %d: msleep", proc_name_address(p), proc_selfpid(), error);
511 		goto out;
512 	}
513 check_again:
514 	if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
515 		socket_unlock(head, 1);
516 		error = EWOULDBLOCK;
517 		os_log(OS_LOG_DEFAULT, "%s:%d accept() error %d: non-blocking  empty queue", proc_name_address(p), proc_selfpid(), error);
518 		goto out;
519 	}
520 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
521 		if (head->so_state & SS_CANTRCVMORE) {
522 			head->so_error = ECONNABORTED;
523 			break;
524 		}
525 		if (head->so_usecount < 1) {
526 			panic("accept: head=%p refcount=%d", head,
527 			    head->so_usecount);
528 		}
529 		error = msleep((caddr_t)&head->so_timeo, mutex_held,
530 		    PSOCK | PCATCH, "accept", 0);
531 		if (head->so_usecount < 1) {
532 			panic("accept: 2 head=%p refcount=%d", head,
533 			    head->so_usecount);
534 		}
535 		if ((head->so_state & SS_DRAINING)) {
536 			error = ECONNABORTED;
537 		}
538 		if (error) {
539 			os_log(OS_LOG_DEFAULT, "%s:%d accept() error %d: msleep", proc_name_address(p), proc_selfpid(), error);
540 			socket_unlock(head, 1);
541 			goto out;
542 		}
543 	}
544 	if (head->so_error) {
545 		error = head->so_error;
546 		head->so_error = 0;
547 		socket_unlock(head, 1);
548 		os_log(OS_LOG_DEFAULT, "%s:%d accept() error %d: head->so_error", proc_name_address(p), proc_selfpid(), error);
549 		goto out;
550 	}
551 
552 	/*
553 	 * At this point we know that there is at least one connection
554 	 * ready to be accepted. Remove it from the queue prior to
555 	 * allocating the file descriptor for it since falloc() may
556 	 * block allowing another process to accept the connection
557 	 * instead.
558 	 */
559 	lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
560 
561 	so_acquire_accept_list(head, NULL);
562 	if (TAILQ_EMPTY(&head->so_comp)) {
563 		so_release_accept_list(head);
564 		goto check_again;
565 	}
566 
567 	so = TAILQ_FIRST(&head->so_comp);
568 	TAILQ_REMOVE(&head->so_comp, so, so_list);
569 	/*
570 	 * Acquire the lock of the new connection
571 	 * as we may be in the process of receiving
572 	 * a packet that may change its so_state
573 	 * (e.g.: a TCP FIN).
574 	 */
575 	if (dosocklock) {
576 		socket_lock(so, 0);
577 	}
578 	so->so_head = NULL;
579 	so->so_state &= ~SS_COMP;
580 	if (dosocklock) {
581 		socket_unlock(so, 0);
582 	}
583 	head->so_qlen--;
584 	so_release_accept_list(head);
585 
586 	/* unlock head to avoid deadlock with select, keep a ref on head */
587 	socket_unlock(head, 0);
588 
589 #if CONFIG_MACF_SOCKET_SUBSET
590 	/*
591 	 * Pass the pre-accepted socket to the MAC framework. This is
592 	 * cheaper than allocating a file descriptor for the socket,
593 	 * calling the protocol accept callback, and possibly freeing
594 	 * the file descriptor should the MAC check fails.
595 	 */
596 	if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
597 		socket_lock(so, 1);
598 		so->so_state &= ~SS_NOFDREF;
599 		socket_unlock(so, 1);
600 		soclose(so);
601 		/* Drop reference on listening socket */
602 		sodereference(head);
603 		goto out;
604 	}
605 #endif /* MAC_SOCKET_SUBSET */
606 
607 	/*
608 	 * Pass the pre-accepted socket to any interested socket filter(s).
609 	 * Upon failure, the socket would have been closed by the callee.
610 	 */
611 	if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
612 		/* Drop reference on listening socket */
613 		sodereference(head);
614 		/* Propagate socket filter's error code to the caller */
615 		os_log(OS_LOG_DEFAULT, "%s:%d accept() error %d: soacceptfilter", proc_name_address(p), proc_selfpid(), error);
616 		goto out;
617 	}
618 
619 	fflag = fp->f_flag;
620 	error = falloc(p, &fp, &newfd);
621 	if (error) {
622 		/*
623 		 * Probably ran out of file descriptors.
624 		 *
625 		 * <rdar://problem/8554930>
626 		 * Don't put this back on the socket like we used to, that
627 		 * just causes the client to spin. Drop the socket.
628 		 */
629 		socket_lock(so, 1);
630 		so->so_state &= ~SS_NOFDREF;
631 		socket_unlock(so, 1);
632 		soclose(so);
633 		sodereference(head);
634 		os_log(OS_LOG_DEFAULT, "%s:%d accept() error %d: falloc", proc_name_address(p), proc_selfpid(), error);
635 		goto out;
636 	}
637 	*retval = newfd;
638 	fp->f_flag = fflag;
639 	fp->f_ops = &socketops;
640 	fp_set_data(fp, so);
641 
642 	socket_lock(head, 0);
643 	if (dosocklock) {
644 		socket_lock(so, 1);
645 	}
646 
647 	/* Sync socket non-blocking/async state with file flags */
648 	if (fp->f_flag & FNONBLOCK) {
649 		so->so_state |= SS_NBIO;
650 	} else {
651 		so->so_state &= ~SS_NBIO;
652 	}
653 
654 	if (fp->f_flag & FASYNC) {
655 		so->so_state |= SS_ASYNC;
656 		so->so_rcv.sb_flags |= SB_ASYNC;
657 		so->so_snd.sb_flags |= SB_ASYNC;
658 	} else {
659 		so->so_state &= ~SS_ASYNC;
660 		so->so_rcv.sb_flags &= ~SB_ASYNC;
661 		so->so_snd.sb_flags &= ~SB_ASYNC;
662 	}
663 
664 	(void) soacceptlock(so, &sa, 0);
665 	socket_unlock(head, 1);
666 	if (sa == NULL) {
667 		namelen = 0;
668 		if (uap->name) {
669 			goto gotnoname;
670 		}
671 		error = 0;
672 		goto releasefd;
673 	}
674 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
675 
676 	if (uap->name) {
677 		socklen_t       sa_len;
678 
679 		/* save sa_len before it is destroyed */
680 		sa_len = sa->sa_len;
681 		namelen = MIN(namelen, sa_len);
682 		error = copyout(__SA_UTILS_CONV_TO_BYTES(sa), uap->name, namelen);
683 		if (!error) {
684 			/* return the actual, untruncated address length */
685 			namelen = sa_len;
686 		}
687 gotnoname:
688 		error = copyout((caddr_t)&namelen, uap->anamelen,
689 		    sizeof(socklen_t));
690 		if (__improbable(error != 0)) {
691 			os_log(OS_LOG_DEFAULT, "%s:%d accept() error %d: falloc", proc_name_address(p), proc_selfpid(), error);
692 		}
693 	}
694 	free_sockaddr(sa);
695 
696 releasefd:
697 	/*
698 	 * If the socket has been marked as inactive by sosetdefunct(),
699 	 * disallow further operations on it.
700 	 */
701 	if (so->so_flags & SOF_DEFUNCT) {
702 		sodefunct(current_proc(), so,
703 		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
704 	}
705 
706 	if (dosocklock) {
707 		socket_unlock(so, 1);
708 	}
709 
710 	proc_fdlock(p);
711 	procfdtbl_releasefd(p, newfd, NULL);
712 	fp_drop(p, newfd, fp, 1);
713 	proc_fdunlock(p);
714 
715 out:
716 	if (error == 0 && ENTR_SHOULDTRACE) {
717 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
718 		    newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
719 	}
720 
721 	file_drop(fd);
722 	return error;
723 }
724 
725 int
accept(proc_ref_t p,struct accept_args * uap,int32_ref_t retval)726 accept(proc_ref_t p, struct accept_args *uap, int32_ref_t retval)
727 {
728 	__pthread_testcancel(1);
729 	return accept_nocancel(p, (struct accept_nocancel_args *)uap,
730 	           retval);
731 }
732 
733 /*
734  * Returns:	0			Success
735  *		EBADF			Bad file descriptor
736  *		EALREADY		Connection already in progress
737  *		EINPROGRESS		Operation in progress
738  *		ECONNABORTED		Connection aborted
739  *		EINTR			Interrupted function
740  *		EACCES			Mandatory Access Control failure
741  *	file_socket:ENOTSOCK
742  *	file_socket:EBADF
743  *	getsockaddr:ENAMETOOLONG	Filename too long
744  *	getsockaddr:EINVAL		Invalid argument
745  *	getsockaddr:ENOMEM		Not enough space
746  *	getsockaddr:EFAULT		Bad address
747  *	soconnectlock:EOPNOTSUPP
748  *	soconnectlock:EISCONN
749  *	soconnectlock:???		[depends on protocol, filters]
750  *	msleep:EINTR
751  *
752  * Imputed:	so_error		error may be set from so_error, which
753  *					may have been set by soconnectlock.
754  */
755 /* ARGSUSED */
756 int
connect(proc_ref_t p,struct connect_args * uap,int32_ref_t retval)757 connect(proc_ref_t p, struct connect_args *uap, int32_ref_t retval)
758 {
759 	__pthread_testcancel(1);
760 	return connect_nocancel(p, (struct connect_nocancel_args *)uap,
761 	           retval);
762 }
763 
764 int
connect_nocancel(proc_t p,struct connect_nocancel_args * uap,int32_ref_t retval)765 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_ref_t retval)
766 {
767 #pragma unused(p, retval)
768 	socket_ref_t so;
769 	struct sockaddr_storage ss;
770 	sockaddr_ref_t  sa = NULL;
771 	int error;
772 	int fd = uap->s;
773 	boolean_t dgram;
774 
775 	AUDIT_ARG(fd, uap->s);
776 	error = file_socket(fd, &so);
777 	if (error != 0) {
778 		return error;
779 	}
780 	if (so == NULL) {
781 		error = EBADF;
782 		goto out;
783 	}
784 
785 	/*
786 	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
787 	 * if this is a datagram socket; translate for other types.
788 	 */
789 	dgram = (so->so_type == SOCK_DGRAM);
790 
791 	/* Get socket address now before we obtain socket lock */
792 	if (uap->namelen > sizeof(ss)) {
793 		error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
794 	} else {
795 		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
796 		if (error == 0) {
797 			sa = SA(&ss);
798 		}
799 	}
800 	if (error != 0) {
801 		goto out;
802 	}
803 
804 	error = connectit(so, sa);
805 
806 	if (sa != NULL && sa != SA(&ss)) {
807 		free_sockaddr(sa);
808 	}
809 	if (error == ERESTART) {
810 		error = EINTR;
811 	}
812 out:
813 	file_drop(fd);
814 	return error;
815 }
816 
817 static int
connectx_nocancel(proc_ref_t p,connectx_args_ref_t uap,int_ref_t retval)818 connectx_nocancel(proc_ref_t p, connectx_args_ref_t uap, int_ref_t retval)
819 {
820 #pragma unused(p, retval)
821 	struct sockaddr_storage ss, sd;
822 	sockaddr_ref_t  src = NULL, dst = NULL;
823 	socket_ref_t so;
824 	int error, error1, fd = uap->socket;
825 	boolean_t dgram;
826 	sae_connid_t cid = SAE_CONNID_ANY;
827 	struct user32_sa_endpoints ep32;
828 	struct user64_sa_endpoints ep64;
829 	struct user_sa_endpoints ep;
830 	user_ssize_t bytes_written = 0;
831 	struct user_iovec *iovp;
832 	uio_t auio = NULL;
833 
834 	AUDIT_ARG(fd, uap->socket);
835 	error = file_socket(fd, &so);
836 	if (error != 0) {
837 		return error;
838 	}
839 	if (so == NULL) {
840 		error = EBADF;
841 		goto out;
842 	}
843 
844 	if (uap->endpoints == USER_ADDR_NULL) {
845 		error = EINVAL;
846 		goto out;
847 	}
848 
849 	if (IS_64BIT_PROCESS(p)) {
850 		error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
851 		if (error != 0) {
852 			goto out;
853 		}
854 
855 		ep.sae_srcif = ep64.sae_srcif;
856 		ep.sae_srcaddr = (user_addr_t)ep64.sae_srcaddr;
857 		ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
858 		ep.sae_dstaddr = (user_addr_t)ep64.sae_dstaddr;
859 		ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
860 	} else {
861 		error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
862 		if (error != 0) {
863 			goto out;
864 		}
865 
866 		ep.sae_srcif = ep32.sae_srcif;
867 		ep.sae_srcaddr = ep32.sae_srcaddr;
868 		ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
869 		ep.sae_dstaddr = ep32.sae_dstaddr;
870 		ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
871 	}
872 
873 	/*
874 	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
875 	 * if this is a datagram socket; translate for other types.
876 	 */
877 	dgram = (so->so_type == SOCK_DGRAM);
878 
879 	/* Get socket address now before we obtain socket lock */
880 	if (ep.sae_srcaddr != USER_ADDR_NULL) {
881 		if (ep.sae_srcaddrlen > sizeof(ss)) {
882 			error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
883 		} else {
884 			error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
885 			if (error == 0) {
886 				src = SA(&ss);
887 			}
888 		}
889 
890 		if (error) {
891 			goto out;
892 		}
893 	}
894 
895 	if (ep.sae_dstaddr == USER_ADDR_NULL) {
896 		error = EINVAL;
897 		goto out;
898 	}
899 
900 	/* Get socket address now before we obtain socket lock */
901 	if (ep.sae_dstaddrlen > sizeof(sd)) {
902 		error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
903 	} else {
904 		error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
905 		if (error == 0) {
906 			dst = SA(&sd);
907 		}
908 	}
909 
910 	if (error) {
911 		goto out;
912 	}
913 
914 	VERIFY(dst != NULL);
915 
916 	if (uap->iov != USER_ADDR_NULL) {
917 		/* Verify range before calling uio_create() */
918 		if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
919 			error = EINVAL;
920 			goto out;
921 		}
922 
923 		if (uap->len == USER_ADDR_NULL) {
924 			error = EINVAL;
925 			goto out;
926 		}
927 
928 		/* allocate a uio to hold the number of iovecs passed */
929 		auio = uio_create(uap->iovcnt, 0,
930 		    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
931 		    UIO_WRITE);
932 
933 		if (auio == NULL) {
934 			error = ENOMEM;
935 			goto out;
936 		}
937 
938 		/*
939 		 * get location of iovecs within the uio.
940 		 * then copyin the iovecs from user space.
941 		 */
942 		iovp = uio_iovsaddr_user(auio);
943 		if (iovp == NULL) {
944 			error = ENOMEM;
945 			goto out;
946 		}
947 		error = copyin_user_iovec_array(uap->iov,
948 		    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
949 		    uap->iovcnt, iovp);
950 		if (error != 0) {
951 			goto out;
952 		}
953 
954 		/* finish setup of uio_t */
955 		error = uio_calculateresid_user(auio);
956 		if (error != 0) {
957 			goto out;
958 		}
959 	}
960 
961 	error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
962 	    &cid, auio, uap->flags, &bytes_written);
963 	if (error == ERESTART) {
964 		error = EINTR;
965 	}
966 
967 	if (uap->len != USER_ADDR_NULL) {
968 		if (IS_64BIT_PROCESS(p)) {
969 			error1 = copyout(&bytes_written, uap->len, sizeof(user64_size_t));
970 		} else {
971 			error1 = copyout(&bytes_written, uap->len, sizeof(user32_size_t));
972 		}
973 		/* give precedence to connectitx errors */
974 		if ((error1 != 0) && (error == 0)) {
975 			error = error1;
976 		}
977 	}
978 
979 	if (uap->connid != USER_ADDR_NULL) {
980 		error1 = copyout(&cid, uap->connid, sizeof(cid));
981 		/* give precedence to connectitx errors */
982 		if ((error1 != 0) && (error == 0)) {
983 			error = error1;
984 		}
985 	}
986 out:
987 	file_drop(fd);
988 	if (auio != NULL) {
989 		uio_free(auio);
990 	}
991 	if (src != NULL && src != SA(&ss)) {
992 		free_sockaddr(src);
993 	}
994 	if (dst != NULL && dst != SA(&sd)) {
995 		free_sockaddr(dst);
996 	}
997 	return error;
998 }
999 
1000 int
connectx(proc_ref_t p,struct connectx_args * uap,int * retval)1001 connectx(proc_ref_t p, struct connectx_args *uap, int *retval)
1002 {
1003 	/*
1004 	 * Due to similiarity with a POSIX interface, define as
1005 	 * an unofficial cancellation point.
1006 	 */
1007 	__pthread_testcancel(1);
1008 	return connectx_nocancel(p, uap, retval);
1009 }
1010 
1011 static int
connectit(struct socket * so,sockaddr_ref_t sa)1012 connectit(struct socket *so, sockaddr_ref_t sa)
1013 {
1014 	int error;
1015 
1016 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
1017 #if CONFIG_MACF_SOCKET_SUBSET
1018 	if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
1019 		return error;
1020 	}
1021 #endif /* MAC_SOCKET_SUBSET */
1022 
1023 	socket_lock(so, 1);
1024 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1025 		error = EALREADY;
1026 		goto out;
1027 	}
1028 	error = soconnectlock(so, sa, 0);
1029 	if (error != 0) {
1030 		goto out;
1031 	}
1032 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1033 		error = EINPROGRESS;
1034 		goto out;
1035 	}
1036 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1037 		lck_mtx_t *mutex_held;
1038 
1039 		if (so->so_proto->pr_getlock != NULL) {
1040 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1041 		} else {
1042 			mutex_held = so->so_proto->pr_domain->dom_mtx;
1043 		}
1044 		error = msleep((caddr_t)&so->so_timeo, mutex_held,
1045 		    PSOCK | PCATCH, __func__, 0);
1046 		if (so->so_state & SS_DRAINING) {
1047 			error = ECONNABORTED;
1048 		}
1049 		if (error != 0) {
1050 			break;
1051 		}
1052 	}
1053 	if (error == 0) {
1054 		error = so->so_error;
1055 		so->so_error = 0;
1056 	}
1057 out:
1058 	socket_unlock(so, 1);
1059 	return error;
1060 }
1061 
1062 static int
connectitx(struct socket * so,sockaddr_ref_t src,sockaddr_ref_t dst,proc_ref_t p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uio_t auio,unsigned int flags,user_ssize_t * bytes_written)1063 connectitx(struct socket *so, sockaddr_ref_t src,
1064     sockaddr_ref_t dst, proc_ref_t p, uint32_t ifscope,
1065     sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
1066     user_ssize_t *bytes_written)
1067 {
1068 	int error;
1069 
1070 	VERIFY(dst != NULL);
1071 
1072 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1073 #if CONFIG_MACF_SOCKET_SUBSET
1074 	if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1075 		return error;
1076 	}
1077 
1078 	if (auio != NULL) {
1079 		if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1080 			return error;
1081 		}
1082 	}
1083 #endif /* MAC_SOCKET_SUBSET */
1084 
1085 	socket_lock(so, 1);
1086 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1087 		error = EALREADY;
1088 		goto out;
1089 	}
1090 
1091 	error = soconnectxlocked(so, src, dst, p, ifscope,
1092 	    aid, pcid, flags, NULL, 0, auio, bytes_written);
1093 	if (error != 0) {
1094 		goto out;
1095 	}
1096 	/*
1097 	 * If, after the call to soconnectxlocked the flag is still set (in case
1098 	 * data has been queued and the connect() has actually been triggered,
1099 	 * it will have been unset by the transport), we exit immediately. There
1100 	 * is no reason to wait on any event.
1101 	 */
1102 	if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1103 		error = 0;
1104 		goto out;
1105 	}
1106 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1107 		error = EINPROGRESS;
1108 		goto out;
1109 	}
1110 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1111 		lck_mtx_t *mutex_held;
1112 
1113 		if (so->so_proto->pr_getlock != NULL) {
1114 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1115 		} else {
1116 			mutex_held = so->so_proto->pr_domain->dom_mtx;
1117 		}
1118 		error = msleep((caddr_t)&so->so_timeo, mutex_held,
1119 		    PSOCK | PCATCH, __func__, 0);
1120 		if (so->so_state & SS_DRAINING) {
1121 			error = ECONNABORTED;
1122 		}
1123 		if (error != 0) {
1124 			break;
1125 		}
1126 	}
1127 	if (error == 0) {
1128 		error = so->so_error;
1129 		so->so_error = 0;
1130 	}
1131 out:
1132 	socket_unlock(so, 1);
1133 	return error;
1134 }
1135 
1136 int
peeloff(proc_ref_t p,struct peeloff_args * uap,int * retval)1137 peeloff(proc_ref_t p, struct peeloff_args *uap, int *retval)
1138 {
1139 #pragma unused(p, uap, retval)
1140 	/*
1141 	 * Due to similiarity with a POSIX interface, define as
1142 	 * an unofficial cancellation point.
1143 	 */
1144 	__pthread_testcancel(1);
1145 	return 0;
1146 }
1147 
1148 int
disconnectx(proc_ref_t p,struct disconnectx_args * uap,int * retval)1149 disconnectx(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1150 {
1151 	/*
1152 	 * Due to similiarity with a POSIX interface, define as
1153 	 * an unofficial cancellation point.
1154 	 */
1155 	__pthread_testcancel(1);
1156 	return disconnectx_nocancel(p, uap, retval);
1157 }
1158 
1159 static int
disconnectx_nocancel(proc_ref_t p,struct disconnectx_args * uap,int * retval)1160 disconnectx_nocancel(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1161 {
1162 #pragma unused(p, retval)
1163 	socket_ref_t so;
1164 	int fd = uap->s;
1165 	int error;
1166 
1167 	error = file_socket(fd, &so);
1168 	if (error != 0) {
1169 		return error;
1170 	}
1171 	if (so == NULL) {
1172 		error = EBADF;
1173 		goto out;
1174 	}
1175 
1176 	error = sodisconnectx(so, uap->aid, uap->cid);
1177 out:
1178 	file_drop(fd);
1179 	return error;
1180 }
1181 
1182 /*
1183  * Returns:	0			Success
1184  *	socreate:EAFNOSUPPORT
1185  *	socreate:EPROTOTYPE
1186  *	socreate:EPROTONOSUPPORT
1187  *	socreate:ENOBUFS
1188  *	socreate:ENOMEM
1189  *	socreate:EISCONN
1190  *	socreate:???			[other protocol families, IPSEC]
1191  *	falloc:ENFILE
1192  *	falloc:EMFILE
1193  *	falloc:ENOMEM
1194  *	copyout:EFAULT
1195  *	soconnect2:EINVAL
1196  *	soconnect2:EPROTOTYPE
1197  *	soconnect2:???			[other protocol families[
1198  */
1199 int
socketpair(proc_ref_t p,struct socketpair_args * uap,__unused int32_ref_t retval)1200 socketpair(proc_ref_t p, struct socketpair_args *uap,
1201     __unused int32_ref_t retval)
1202 {
1203 	fileproc_ref_t  fp1, fp2;
1204 	socket_ref_t so1, so2;
1205 	int fd, error, sv[2];
1206 
1207 	AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1208 	error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1209 	if (error) {
1210 		return error;
1211 	}
1212 	error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1213 	if (error) {
1214 		goto free1;
1215 	}
1216 
1217 	error = falloc(p, &fp1, &fd);
1218 	if (error) {
1219 		goto free2;
1220 	}
1221 	fp1->f_flag = FREAD | FWRITE;
1222 	fp1->f_ops = &socketops;
1223 	fp_set_data(fp1, so1);
1224 	sv[0] = fd;
1225 
1226 	error = falloc(p, &fp2, &fd);
1227 	if (error) {
1228 		goto free3;
1229 	}
1230 	fp2->f_flag = FREAD | FWRITE;
1231 	fp2->f_ops = &socketops;
1232 	fp_set_data(fp2, so2);
1233 	sv[1] = fd;
1234 
1235 	error = soconnect2(so1, so2);
1236 	if (error) {
1237 		goto free4;
1238 	}
1239 	if (uap->type == SOCK_DGRAM) {
1240 		/*
1241 		 * Datagram socket connection is asymmetric.
1242 		 */
1243 		error = soconnect2(so2, so1);
1244 		if (error) {
1245 			goto free4;
1246 		}
1247 	}
1248 
1249 	if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1250 		goto free4;
1251 	}
1252 
1253 	proc_fdlock(p);
1254 	procfdtbl_releasefd(p, sv[0], NULL);
1255 	procfdtbl_releasefd(p, sv[1], NULL);
1256 	fp_drop(p, sv[0], fp1, 1);
1257 	fp_drop(p, sv[1], fp2, 1);
1258 	proc_fdunlock(p);
1259 
1260 	return 0;
1261 free4:
1262 	fp_free(p, sv[1], fp2);
1263 free3:
1264 	fp_free(p, sv[0], fp1);
1265 free2:
1266 	(void) soclose(so2);
1267 free1:
1268 	(void) soclose(so1);
1269 	return error;
1270 }
1271 
1272 /*
1273  * Returns:	0			Success
1274  *		EINVAL
1275  *		ENOBUFS
1276  *		EBADF
1277  *		EPIPE
1278  *		EACCES			Mandatory Access Control failure
1279  *	file_socket:ENOTSOCK
1280  *	file_socket:EBADF
1281  *	getsockaddr:ENAMETOOLONG	Filename too long
1282  *	getsockaddr:EINVAL		Invalid argument
1283  *	getsockaddr:ENOMEM		Not enough space
1284  *	getsockaddr:EFAULT		Bad address
1285  *	<pru_sosend>:EACCES[TCP]
1286  *	<pru_sosend>:EADDRINUSE[TCP]
1287  *	<pru_sosend>:EADDRNOTAVAIL[TCP]
1288  *	<pru_sosend>:EAFNOSUPPORT[TCP]
1289  *	<pru_sosend>:EAGAIN[TCP]
1290  *	<pru_sosend>:EBADF
1291  *	<pru_sosend>:ECONNRESET[TCP]
1292  *	<pru_sosend>:EFAULT
1293  *	<pru_sosend>:EHOSTUNREACH[TCP]
1294  *	<pru_sosend>:EINTR
1295  *	<pru_sosend>:EINVAL
1296  *	<pru_sosend>:EISCONN[AF_INET]
1297  *	<pru_sosend>:EMSGSIZE[TCP]
1298  *	<pru_sosend>:ENETDOWN[TCP]
1299  *	<pru_sosend>:ENETUNREACH[TCP]
1300  *	<pru_sosend>:ENOBUFS
1301  *	<pru_sosend>:ENOMEM[TCP]
1302  *	<pru_sosend>:ENOTCONN[AF_INET]
1303  *	<pru_sosend>:EOPNOTSUPP
1304  *	<pru_sosend>:EPERM[TCP]
1305  *	<pru_sosend>:EPIPE
1306  *	<pru_sosend>:EWOULDBLOCK
1307  *	<pru_sosend>:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
1308  *	<pru_sosend>:???[AF_INET]	[whatever a filter author chooses]
1309  *	<pru_sosend>:???		[value from so_error]
1310  *	sockargs:???
1311  */
1312 static int
sendit(proc_ref_t p,struct socket * so,user_msghdr_ref_t mp,uio_t uiop,int flags,int32_ref_t retval)1313 sendit(proc_ref_t p, struct socket *so, user_msghdr_ref_t mp, uio_t uiop,
1314     int flags, int32_ref_t retval)
1315 {
1316 	mbuf_ref_t  control = NULL;
1317 	struct sockaddr_storage ss;
1318 	sockaddr_ref_t  to = NULL;
1319 	boolean_t want_free = TRUE;
1320 	int error;
1321 	user_ssize_t len;
1322 
1323 	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1324 
1325 	if (mp->msg_name != USER_ADDR_NULL) {
1326 		if (mp->msg_namelen > sizeof(ss)) {
1327 			error = getsockaddr(so, &to, mp->msg_name,
1328 			    mp->msg_namelen, TRUE);
1329 		} else {
1330 			error = getsockaddr_s(so, &ss, mp->msg_name,
1331 			    mp->msg_namelen, TRUE);
1332 			if (error == 0) {
1333 				to = SA(&ss);
1334 				want_free = FALSE;
1335 			}
1336 		}
1337 		if (error != 0) {
1338 			goto out;
1339 		}
1340 		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1341 	}
1342 	if (mp->msg_control != USER_ADDR_NULL) {
1343 		if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1344 			error = EINVAL;
1345 			goto bad;
1346 		}
1347 		error = sockargs(&control, mp->msg_control,
1348 		    mp->msg_controllen, MT_CONTROL);
1349 		if (error != 0) {
1350 			goto bad;
1351 		}
1352 	}
1353 
1354 #if CONFIG_MACF_SOCKET_SUBSET
1355 	/*
1356 	 * We check the state without holding the socket lock;
1357 	 * if a race condition occurs, it would simply result
1358 	 * in an extra call to the MAC check function.
1359 	 */
1360 	if (to != NULL &&
1361 	    !(so->so_state & SS_DEFUNCT) &&
1362 	    (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1363 		if (control != NULL) {
1364 			m_freem(control);
1365 		}
1366 
1367 		goto bad;
1368 	}
1369 #endif /* MAC_SOCKET_SUBSET */
1370 
1371 	len = uio_resid(uiop);
1372 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1373 	    control, flags);
1374 	if (error != 0) {
1375 		if (uio_resid(uiop) != len && (error == ERESTART ||
1376 		    error == EINTR || error == EWOULDBLOCK)) {
1377 			error = 0;
1378 		}
1379 		/* Generation of SIGPIPE can be controlled per socket */
1380 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1381 		    !(flags & MSG_NOSIGNAL)) {
1382 			psignal(p, SIGPIPE);
1383 		}
1384 	}
1385 	if (error == 0) {
1386 		*retval = (int)(len - uio_resid(uiop));
1387 	}
1388 bad:
1389 	if (want_free) {
1390 		free_sockaddr(to);
1391 	}
1392 out:
1393 	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1394 
1395 	return error;
1396 }
1397 
1398 /*
1399  * Returns:	0			Success
1400  *		ENOMEM
1401  *	sendit:???			[see sendit definition in this file]
1402  *	write:???			[4056224: applicable for pipes]
1403  */
1404 int
sendto(proc_ref_t p,struct sendto_args * uap,int32_ref_t retval)1405 sendto(proc_ref_t p, struct sendto_args *uap, int32_ref_t retval)
1406 {
1407 	__pthread_testcancel(1);
1408 	return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1409 }
1410 
1411 int
sendto_nocancel(proc_ref_t p,struct sendto_nocancel_args * uap,int32_ref_t retval)1412 sendto_nocancel(proc_ref_t p,
1413     struct sendto_nocancel_args *uap,
1414     int32_ref_t retval)
1415 {
1416 	struct user_msghdr msg;
1417 	int error;
1418 	uio_t auio = NULL;
1419 	socket_ref_t so;
1420 
1421 	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1422 	AUDIT_ARG(fd, uap->s);
1423 
1424 	if (uap->flags & MSG_SKIPCFIL) {
1425 		error = EPERM;
1426 		goto done;
1427 	}
1428 
1429 	if (uap->len > LONG_MAX) {
1430 		error = EINVAL;
1431 		goto done;
1432 	}
1433 
1434 	auio = uio_create(1, 0,
1435 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1436 	    UIO_WRITE);
1437 	if (auio == NULL) {
1438 		error = ENOMEM;
1439 		goto done;
1440 	}
1441 	uio_addiov(auio, uap->buf, uap->len);
1442 
1443 	msg.msg_name = uap->to;
1444 	msg.msg_namelen = uap->tolen;
1445 	/* no need to set up msg_iov.  sendit uses uio_t we send it */
1446 	msg.msg_iov = 0;
1447 	msg.msg_iovlen = 0;
1448 	msg.msg_control = 0;
1449 	msg.msg_flags = 0;
1450 
1451 	error = file_socket(uap->s, &so);
1452 	if (error) {
1453 		goto done;
1454 	}
1455 
1456 	if (so == NULL) {
1457 		error = EBADF;
1458 	} else {
1459 		error = sendit(p, so, &msg, auio, uap->flags, retval);
1460 	}
1461 
1462 	file_drop(uap->s);
1463 done:
1464 	if (auio != NULL) {
1465 		uio_free(auio);
1466 	}
1467 
1468 	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1469 
1470 	return error;
1471 }
1472 
1473 /*
1474  * Returns:	0			Success
1475  *		ENOBUFS
1476  *	copyin:EFAULT
1477  *	sendit:???			[see sendit definition in this file]
1478  */
1479 int
sendmsg(proc_ref_t p,struct sendmsg_args * uap,int32_ref_t retval)1480 sendmsg(proc_ref_t p, struct sendmsg_args *uap, int32_ref_t retval)
1481 {
1482 	__pthread_testcancel(1);
1483 	return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1484 	           retval);
1485 }
1486 
1487 int
sendmsg_nocancel(proc_ref_t p,struct sendmsg_nocancel_args * uap,int32_ref_t retval)1488 sendmsg_nocancel(proc_ref_t p, struct sendmsg_nocancel_args *uap,
1489     int32_ref_t retval)
1490 {
1491 	struct user32_msghdr msg32;
1492 	struct user64_msghdr msg64;
1493 	struct user_msghdr user_msg;
1494 	caddr_t msghdrp;
1495 	int     size_of_msghdr;
1496 	int error;
1497 	uio_t auio = NULL;
1498 	struct user_iovec *iovp;
1499 	socket_ref_t so;
1500 
1501 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1502 
1503 	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1504 	AUDIT_ARG(fd, uap->s);
1505 
1506 	if (uap->flags & MSG_SKIPCFIL) {
1507 		error = EPERM;
1508 		goto done;
1509 	}
1510 
1511 	if (is_p_64bit_process) {
1512 		msghdrp = (caddr_t)&msg64;
1513 		size_of_msghdr = sizeof(msg64);
1514 	} else {
1515 		msghdrp = (caddr_t)&msg32;
1516 		size_of_msghdr = sizeof(msg32);
1517 	}
1518 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
1519 	if (error) {
1520 		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1521 		return error;
1522 	}
1523 
1524 	if (is_p_64bit_process) {
1525 		user_msg.msg_flags = msg64.msg_flags;
1526 		user_msg.msg_controllen = msg64.msg_controllen;
1527 		user_msg.msg_control = (user_addr_t)msg64.msg_control;
1528 		user_msg.msg_iovlen = msg64.msg_iovlen;
1529 		user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
1530 		user_msg.msg_namelen = msg64.msg_namelen;
1531 		user_msg.msg_name = (user_addr_t)msg64.msg_name;
1532 	} else {
1533 		user_msg.msg_flags = msg32.msg_flags;
1534 		user_msg.msg_controllen = msg32.msg_controllen;
1535 		user_msg.msg_control = msg32.msg_control;
1536 		user_msg.msg_iovlen = msg32.msg_iovlen;
1537 		user_msg.msg_iov = msg32.msg_iov;
1538 		user_msg.msg_namelen = msg32.msg_namelen;
1539 		user_msg.msg_name = msg32.msg_name;
1540 	}
1541 
1542 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1543 		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1544 		    0, 0, 0, 0);
1545 		return EMSGSIZE;
1546 	}
1547 
1548 	/* allocate a uio large enough to hold the number of iovecs passed */
1549 	auio = uio_create(user_msg.msg_iovlen, 0,
1550 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1551 	    UIO_WRITE);
1552 	if (auio == NULL) {
1553 		error = ENOBUFS;
1554 		goto done;
1555 	}
1556 
1557 	if (user_msg.msg_iovlen) {
1558 		/*
1559 		 * get location of iovecs within the uio.
1560 		 * then copyin the iovecs from user space.
1561 		 */
1562 		iovp = uio_iovsaddr_user(auio);
1563 		if (iovp == NULL) {
1564 			error = ENOBUFS;
1565 			goto done;
1566 		}
1567 		error = copyin_user_iovec_array(user_msg.msg_iov,
1568 		    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1569 		    user_msg.msg_iovlen, iovp);
1570 		if (error) {
1571 			goto done;
1572 		}
1573 		user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1574 
1575 		/* finish setup of uio_t */
1576 		error = uio_calculateresid_user(auio);
1577 		if (error) {
1578 			goto done;
1579 		}
1580 	} else {
1581 		user_msg.msg_iov = 0;
1582 	}
1583 
1584 	/* msg_flags is ignored for send */
1585 	user_msg.msg_flags = 0;
1586 
1587 	error = file_socket(uap->s, &so);
1588 	if (error) {
1589 		goto done;
1590 	}
1591 	if (so == NULL) {
1592 		error = EBADF;
1593 	} else {
1594 		error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1595 	}
1596 	file_drop(uap->s);
1597 done:
1598 	if (auio != NULL) {
1599 		uio_free(auio);
1600 	}
1601 	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1602 
1603 	return error;
1604 }
1605 
1606 static int
internalize_user_msg_x(struct user_msghdr * user_msg,uio_t * auiop,proc_ref_t p,void_ptr_t user_msghdr_x_src)1607 internalize_user_msg_x(struct user_msghdr *user_msg, uio_t *auiop, proc_ref_t p, void_ptr_t user_msghdr_x_src)
1608 {
1609 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1610 	uio_t auio = *auiop;
1611 	int error;
1612 
1613 	if (is_p_64bit_process) {
1614 		struct user64_msghdr_x msghdrx64;
1615 
1616 		error = copyin((user_addr_t)user_msghdr_x_src,
1617 		    &msghdrx64, sizeof(msghdrx64));
1618 		if (error != 0) {
1619 			DBG_PRINTF("%s copyin() msghdrx64 failed %d",
1620 			    __func__, error);
1621 			goto done;
1622 		}
1623 		user_msg->msg_name = msghdrx64.msg_name;
1624 		user_msg->msg_namelen = msghdrx64.msg_namelen;
1625 		user_msg->msg_iov = msghdrx64.msg_iov;
1626 		user_msg->msg_iovlen = msghdrx64.msg_iovlen;
1627 		user_msg->msg_control = msghdrx64.msg_control;
1628 		user_msg->msg_controllen = msghdrx64.msg_controllen;
1629 	} else {
1630 		struct user32_msghdr_x msghdrx32;
1631 
1632 		error = copyin((user_addr_t)user_msghdr_x_src,
1633 		    &msghdrx32, sizeof(msghdrx32));
1634 		if (error != 0) {
1635 			DBG_PRINTF("%s copyin() msghdrx32 failed %d",
1636 			    __func__, error);
1637 			goto done;
1638 		}
1639 		user_msg->msg_name = msghdrx32.msg_name;
1640 		user_msg->msg_namelen = msghdrx32.msg_namelen;
1641 		user_msg->msg_iov = msghdrx32.msg_iov;
1642 		user_msg->msg_iovlen = msghdrx32.msg_iovlen;
1643 		user_msg->msg_control = msghdrx32.msg_control;
1644 		user_msg->msg_controllen = msghdrx32.msg_controllen;
1645 	}
1646 	/* msg_flags is ignored for send */
1647 	user_msg->msg_flags = 0;
1648 
1649 	if (user_msg->msg_iovlen <= 0 || user_msg->msg_iovlen > UIO_MAXIOV) {
1650 		error = EMSGSIZE;
1651 		DBG_PRINTF("%s bad msg_iovlen, error %d",
1652 		    __func__, error);
1653 		goto done;
1654 	}
1655 	/*
1656 	 * Attempt to reuse the uio if large enough, otherwise we need
1657 	 * a new one
1658 	 */
1659 	if (auio != NULL) {
1660 		if (auio->uio_max_iovs >= user_msg->msg_iovlen) {
1661 			uio_reset_fast(auio, 0,
1662 			    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1663 			    UIO_WRITE);
1664 		} else {
1665 			uio_free(auio);
1666 			auio = NULL;
1667 		}
1668 	}
1669 	if (auio == NULL) {
1670 		auio = uio_create(user_msg->msg_iovlen, 0,
1671 		    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1672 		    UIO_WRITE);
1673 		if (auio == NULL) {
1674 			error = ENOBUFS;
1675 			DBG_PRINTF("%s uio_create() failed %d",
1676 			    __func__, error);
1677 			goto done;
1678 		}
1679 	}
1680 
1681 	if (user_msg->msg_iovlen) {
1682 		/*
1683 		 * get location of iovecs within the uio.
1684 		 * then copyin the iovecs from user space.
1685 		 */
1686 		struct user_iovec *iovp = uio_iovsaddr_user(auio);
1687 		if (iovp == NULL) {
1688 			error = ENOBUFS;
1689 			goto done;
1690 		}
1691 		error = copyin_user_iovec_array(user_msg->msg_iov,
1692 		    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1693 		    user_msg->msg_iovlen, iovp);
1694 		if (error != 0) {
1695 			goto done;
1696 		}
1697 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
1698 
1699 		/* finish setup of uio_t */
1700 		error = uio_calculateresid_user(auio);
1701 		if (error) {
1702 			goto done;
1703 		}
1704 	} else {
1705 		user_msg->msg_iov = 0;
1706 	}
1707 
1708 done:
1709 	*auiop = auio;
1710 	return error;
1711 }
1712 
1713 static int
mbuf_packet_from_uio(socket_ref_t so,mbuf_ref_ref_t mp,uio_t auio)1714 mbuf_packet_from_uio(socket_ref_t so, mbuf_ref_ref_t mp, uio_t auio)
1715 {
1716 	int error = 0;
1717 	uint16_t headroom = 0;
1718 	size_t bytes_to_alloc;
1719 	mbuf_ref_t top = NULL, m;
1720 
1721 	if (soreserveheadroom != 0) {
1722 		headroom = so->so_pktheadroom;
1723 	}
1724 	bytes_to_alloc = headroom + uio_resid(auio);
1725 
1726 	error = mbuf_allocpacket(MBUF_WAITOK, bytes_to_alloc, NULL, &top);
1727 	if (error != 0) {
1728 		os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: mbuf_allocpacket %zu error %d",
1729 		    bytes_to_alloc, error);
1730 		goto done;
1731 	}
1732 
1733 	if (headroom > 0 && headroom < mbuf_maxlen(top)) {
1734 		top->m_data += headroom;
1735 	}
1736 
1737 	for (m = top; m != NULL; m = m->m_next) {
1738 		int bytes_to_copy = (int)uio_resid(auio);
1739 		ssize_t mlen;
1740 
1741 		if ((m->m_flags & M_EXT)) {
1742 			mlen = m->m_ext.ext_size -
1743 			    M_LEADINGSPACE(m);
1744 		} else if ((m->m_flags & M_PKTHDR)) {
1745 			mlen = MHLEN - M_LEADINGSPACE(m);
1746 			m_add_crumb(m, PKT_CRUMB_SOSEND);
1747 		} else {
1748 			mlen = MLEN - M_LEADINGSPACE(m);
1749 		}
1750 		int len = imin((int)mlen, bytes_to_copy);
1751 
1752 		error = uio_copyin_user(mtod(m, caddr_t), (int)len, auio);
1753 		if (error != 0) {
1754 			os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: len %d error %d",
1755 			    len, error);
1756 			goto done;
1757 		}
1758 		m->m_len = len;
1759 		top->m_pkthdr.len += len;
1760 	}
1761 
1762 done:
1763 	if (error != 0) {
1764 		m_freem(top);
1765 	} else {
1766 		*mp = top;
1767 	}
1768 	return error;
1769 }
1770 
1771 static int
sendit_x(proc_ref_t p,socket_ref_t so,struct sendmsg_x_args * uap,u_int * retval)1772 sendit_x(proc_ref_t p, socket_ref_t so, struct sendmsg_x_args *uap, u_int *retval)
1773 {
1774 	int error = 0;
1775 	uio_t __single auio = NULL;
1776 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1777 	void *src;
1778 	MBUFQ_HEAD() pktlist = {};
1779 	size_t total_pkt_len = 0;
1780 	u_int pkt_cnt = 0;
1781 	int flags = uap->flags;
1782 	mbuf_ref_t top;
1783 
1784 	MBUFQ_INIT(&pktlist);
1785 
1786 	*retval = 0;
1787 
1788 	/* We re-use the uio when possible */
1789 	auio = uio_create(1, 0,
1790 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1791 	    UIO_WRITE);
1792 	if (auio == NULL) {
1793 		error = ENOBUFS;
1794 		DBG_PRINTF("%s uio_create() failed %d",
1795 		    __func__, error);
1796 		goto done;
1797 	}
1798 
1799 	src = __unsafe_forge_bidi_indexable(void *, uap->msgp, uap->cnt);
1800 
1801 	/*
1802 	 * Create a list of packets
1803 	 */
1804 	for (u_int i = 0; i < uap->cnt; i++) {
1805 		struct user_msghdr user_msg = {};
1806 		mbuf_ref_t m = NULL;
1807 
1808 		if (is_p_64bit_process) {
1809 			error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user64_msghdr_x *)src) + i);
1810 			if (error != 0) {
1811 				os_log(OS_LOG_DEFAULT, "sendit_x: internalize_user_msg_x error %d\n", error);
1812 				goto done;
1813 			}
1814 		} else {
1815 			error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user32_msghdr_x *)src) + i);
1816 			if (error != 0) {
1817 				os_log(OS_LOG_DEFAULT, "sendit_x: internalize_user_msg_x error %d\n", error);
1818 				goto done;
1819 			}
1820 		}
1821 		/*
1822 		 * Stop on the first datagram that is too large
1823 		 */
1824 		if (uio_resid(auio) > so->so_snd.sb_hiwat) {
1825 			if (i == 0) {
1826 				error = EMSGSIZE;
1827 				goto done;
1828 			}
1829 			break;
1830 		}
1831 		/*
1832 		 * An mbuf packet has the control mbuf(s) followed by data
1833 		 * We allocate the mbufs in reverse order
1834 		 */
1835 		error = mbuf_packet_from_uio(so, &m, auio);
1836 		if (error != 0) {
1837 			os_log(OS_LOG_DEFAULT, "sendit_x: mbuf_packet_from_uio error %d\n", error);
1838 			goto done;
1839 		}
1840 		total_pkt_len += m->m_pkthdr.len;
1841 
1842 		if (user_msg.msg_control != USER_ADDR_NULL && user_msg.msg_controllen != 0) {
1843 			mbuf_ref_t control = NULL;
1844 
1845 			error = sockargs(&control, user_msg.msg_control, user_msg.msg_controllen, MT_CONTROL);
1846 			if (error != 0) {
1847 				os_log(OS_LOG_DEFAULT, "sendit_x: sockargs error %d\n", error);
1848 				goto done;
1849 			}
1850 			control->m_next = m;
1851 			m = control;
1852 		}
1853 		MBUFQ_ENQUEUE(&pktlist, m);
1854 
1855 		pkt_cnt += 1;
1856 	}
1857 
1858 	top = MBUFQ_FIRST(&pktlist);
1859 	MBUFQ_INIT(&pktlist);
1860 	error = sosend_list(so, top, total_pkt_len, &pkt_cnt, flags);
1861 	if (error != 0 && error != ENOBUFS) {
1862 		os_log(OS_LOG_DEFAULT, "sendit_x: sosend_list error %d\n", error);
1863 	}
1864 done:
1865 	*retval = pkt_cnt;
1866 
1867 	if (auio != NULL) {
1868 		uio_free(auio);
1869 	}
1870 	MBUFQ_DRAIN(&pktlist);
1871 	return error;
1872 }
1873 
1874 int
sendmsg_x(proc_ref_t p,struct sendmsg_x_args * uap,user_ssize_t * retval)1875 sendmsg_x(proc_ref_t p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1876 {
1877 	void *src;
1878 	int error;
1879 	uio_t __single auio = NULL;
1880 	socket_ref_t so;
1881 	u_int uiocnt = 0;
1882 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1883 
1884 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1885 	AUDIT_ARG(fd, uap->s);
1886 
1887 	if (uap->flags & MSG_SKIPCFIL) {
1888 		error = EPERM;
1889 		goto done_no_filedrop;
1890 	}
1891 
1892 	error = file_socket(uap->s, &so);
1893 	if (error) {
1894 		goto done_no_filedrop;
1895 	}
1896 	if (so == NULL) {
1897 		error = EBADF;
1898 		goto done;
1899 	}
1900 
1901 	/*
1902 	 * For an atomic datagram connected socket we can build the list of
1903 	 * mbuf packets with sosend_list()
1904 	 */
1905 	if (so->so_type == SOCK_DGRAM && sosendallatonce(so) &&
1906 	    (so->so_state & SS_ISCONNECTED) && sendmsg_x_mode != 1) {
1907 		error = sendit_x(p, so, uap, &uiocnt);
1908 		if (error != 0) {
1909 			DBG_PRINTF("%s sendit_x() failed %d",
1910 			    __func__, error);
1911 		}
1912 		goto done;
1913 	}
1914 
1915 	src = __unsafe_forge_bidi_indexable(void *, uap->msgp, uap->cnt);
1916 
1917 	/* We re-use the uio when possible */
1918 	auio = uio_create(1, 0,
1919 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1920 	    UIO_WRITE);
1921 	if (auio == NULL) {
1922 		error = ENOBUFS;
1923 		DBG_PRINTF("%s uio_create() failed %d",
1924 		    __func__, error);
1925 		goto done;
1926 	}
1927 
1928 	for (u_int i = 0; i < uap->cnt; i++) {
1929 		struct user_msghdr user_msg = {};
1930 
1931 		if (is_p_64bit_process) {
1932 			error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user64_msghdr_x *)src) + i);
1933 			if (error != 0) {
1934 				goto done;
1935 			}
1936 		} else {
1937 			error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user32_msghdr_x *)src) + i);
1938 			if (error != 0) {
1939 				goto done;
1940 			}
1941 		}
1942 
1943 		int32_t len = 0;
1944 		error = sendit(p, so, &user_msg, auio, uap->flags, &len);
1945 		if (error != 0) {
1946 			break;
1947 		}
1948 		uiocnt += 1;
1949 	}
1950 done:
1951 	if (error != 0) {
1952 		if (uiocnt != 0 && (error == ERESTART ||
1953 		    error == EINTR || error == EWOULDBLOCK ||
1954 		    error == ENOBUFS || error == EMSGSIZE)) {
1955 			error = 0;
1956 		}
1957 		/* Generation of SIGPIPE can be controlled per socket */
1958 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1959 		    !(uap->flags & MSG_NOSIGNAL)) {
1960 			psignal(p, SIGPIPE);
1961 		}
1962 	}
1963 	if (error == 0) {
1964 		*retval = (int)(uiocnt);
1965 	}
1966 	file_drop(uap->s);
1967 
1968 done_no_filedrop:
1969 	if (auio != NULL) {
1970 		uio_free(auio);
1971 	}
1972 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1973 
1974 	return error;
1975 }
1976 
1977 
1978 static int
copyout_sa(sockaddr_ref_t fromsa,user_addr_t name,socklen_t * namelen)1979 copyout_sa(sockaddr_ref_t fromsa, user_addr_t name, socklen_t *namelen)
1980 {
1981 	int error = 0;
1982 	socklen_t sa_len = 0;
1983 	ssize_t len;
1984 
1985 	len = *namelen;
1986 	if (len <= 0 || fromsa == 0) {
1987 		len = 0;
1988 	} else {
1989 #ifndef MIN
1990 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1991 #endif
1992 		sa_len = fromsa->sa_len;
1993 		len = MIN((unsigned int)len, sa_len);
1994 		error = copyout(__SA_UTILS_CONV_TO_BYTES(fromsa), name, (unsigned)len);
1995 		if (error) {
1996 			goto out;
1997 		}
1998 	}
1999 	*namelen = sa_len;
2000 out:
2001 	return 0;
2002 }
2003 
2004 static int
copyout_maddr(struct mbuf * m,user_addr_t name,socklen_t * namelen)2005 copyout_maddr(struct mbuf *m, user_addr_t name, socklen_t *namelen)
2006 {
2007 	int error = 0;
2008 	socklen_t sa_len = 0;
2009 	ssize_t len;
2010 
2011 	len = *namelen;
2012 	if (len <= 0 || m == NULL) {
2013 		len = 0;
2014 	} else {
2015 #ifndef MIN
2016 #define MIN(a, b) ((a) > (b) ? (b) : (a))
2017 #endif
2018 		struct sockaddr *fromsa = mtod(m, struct sockaddr *);
2019 
2020 		sa_len = fromsa->sa_len;
2021 		len = MIN((unsigned int)len, sa_len);
2022 		error = copyout(fromsa, name, (unsigned)len);
2023 		if (error != 0) {
2024 			goto out;
2025 		}
2026 	}
2027 	*namelen = sa_len;
2028 out:
2029 	return 0;
2030 }
2031 
2032 static int
copyout_control(proc_ref_t p,mbuf_ref_t m,user_addr_t control,socklen_ref_t controllen,int_ref_t flags,socket_ref_t so)2033 copyout_control(proc_ref_t p, mbuf_ref_t m, user_addr_t control,
2034     socklen_ref_t controllen, int_ref_t flags, socket_ref_t so)
2035 {
2036 	int error = 0;
2037 	socklen_t len;
2038 	user_addr_t ctlbuf;
2039 	struct inpcb *inp = NULL;
2040 	bool want_pktinfo = false;
2041 	bool seen_pktinfo = false;
2042 
2043 	if (so != NULL && (SOCK_DOM(so) == PF_INET6 || SOCK_DOM(so) == PF_INET)) {
2044 		inp = sotoinpcb(so);
2045 		want_pktinfo = (inp->inp_flags & IN6P_PKTINFO) != 0;
2046 	}
2047 
2048 	len = *controllen;
2049 	*controllen = 0;
2050 	ctlbuf = control;
2051 
2052 	while (m && len > 0) {
2053 		socklen_t tocopy;
2054 		struct cmsghdr *cp = mtod(m, struct cmsghdr *);
2055 		socklen_t cp_size = CMSG_ALIGN(cp->cmsg_len);
2056 		socklen_t buflen = m->m_len;
2057 
2058 		while (buflen > 0 && len > 0) {
2059 			/*
2060 			 * SCM_TIMESTAMP hack because  struct timeval has a
2061 			 * different size for 32 bits and 64 bits processes
2062 			 */
2063 			if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
2064 				unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
2065 				struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
2066 				socklen_t tmp_space;
2067 				struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
2068 
2069 				tmp_cp->cmsg_level = SOL_SOCKET;
2070 				tmp_cp->cmsg_type = SCM_TIMESTAMP;
2071 
2072 				if (proc_is64bit(p)) {
2073 					struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
2074 
2075 					os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
2076 					os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
2077 
2078 					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
2079 					tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
2080 				} else {
2081 					struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
2082 
2083 					tv32->tv_sec = (user32_time_t)tv->tv_sec;
2084 					tv32->tv_usec = tv->tv_usec;
2085 
2086 					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
2087 					tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
2088 				}
2089 				if (len >= tmp_space) {
2090 					tocopy = tmp_space;
2091 				} else {
2092 					*flags |= MSG_CTRUNC;
2093 					tocopy = len;
2094 				}
2095 				error = copyout(tmp_buffer, ctlbuf, tocopy);
2096 				if (error) {
2097 					goto out;
2098 				}
2099 			} else {
2100 				/* If socket has flow tracking and socket did not request address, ignore it */
2101 				if (SOFLOW_ENABLED(so) &&
2102 				    ((cp->cmsg_level == IPPROTO_IP && cp->cmsg_type == IP_RECVDSTADDR && inp != NULL &&
2103 				    !(inp->inp_flags & INP_RECVDSTADDR)) ||
2104 				    (cp->cmsg_level == IPPROTO_IPV6 && (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO) && inp &&
2105 				    !(inp->inp_flags & IN6P_PKTINFO)))) {
2106 					tocopy = 0;
2107 				} else {
2108 					if (cp_size > buflen) {
2109 						panic("cp_size > buflen, something wrong with alignment!");
2110 					}
2111 					if (len >= cp_size) {
2112 						tocopy = cp_size;
2113 					} else {
2114 						*flags |= MSG_CTRUNC;
2115 						tocopy = len;
2116 					}
2117 					error = copyout((caddr_t) cp, ctlbuf, tocopy);
2118 					if (error) {
2119 						goto out;
2120 					}
2121 					if (want_pktinfo && cp->cmsg_level == IPPROTO_IPV6 &&
2122 					    (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO)) {
2123 						seen_pktinfo = true;
2124 					}
2125 				}
2126 			}
2127 
2128 
2129 			ctlbuf += tocopy;
2130 			len -= tocopy;
2131 
2132 			buflen -= cp_size;
2133 			cp = (struct cmsghdr *)(void *)
2134 			    ((unsigned char *) cp + cp_size);
2135 			cp_size = CMSG_ALIGN(cp->cmsg_len);
2136 		}
2137 
2138 		m = m->m_next;
2139 	}
2140 	*controllen = (socklen_t)(ctlbuf - control);
2141 out:
2142 	if (want_pktinfo && !seen_pktinfo) {
2143 		missingpktinfo += 1;
2144 #if (DEBUG || DEVELOPMENT)
2145 		char pname[MAXCOMLEN];
2146 		char local[MAX_IPv6_STR_LEN + 6];
2147 		char remote[MAX_IPv6_STR_LEN + 6];
2148 
2149 		proc_name(so->last_pid, pname, sizeof(MAXCOMLEN));
2150 		if (inp->inp_vflag & INP_IPV6) {
2151 			inet_ntop(AF_INET6, &inp->in6p_laddr.s6_addr, local, sizeof(local));
2152 			inet_ntop(AF_INET6, &inp->in6p_faddr.s6_addr, remote, sizeof(local));
2153 		} else {
2154 			inet_ntop(AF_INET, &inp->inp_laddr.s_addr, local, sizeof(local));
2155 			inet_ntop(AF_INET, &inp->inp_faddr.s_addr, remote, sizeof(local));
2156 		}
2157 
2158 		os_log(OS_LOG_DEFAULT,
2159 		    "cmsg IPV6_PKTINFO missing for %s:%u > %s:%u proc %s.%u error %d\n",
2160 		    local, ntohs(inp->inp_lport), remote, ntohs(inp->inp_fport),
2161 		    pname, so->last_pid, error);
2162 #endif /* (DEBUG || DEVELOPMENT) */
2163 	}
2164 	return error;
2165 }
2166 
2167 /*
2168  * Returns:	0			Success
2169  *		ENOTSOCK
2170  *		EINVAL
2171  *		EBADF
2172  *		EACCES			Mandatory Access Control failure
2173  *	copyout:EFAULT
2174  *	fp_lookup:EBADF
2175  *	<pru_soreceive>:ENOBUFS
2176  *	<pru_soreceive>:ENOTCONN
2177  *	<pru_soreceive>:EWOULDBLOCK
2178  *	<pru_soreceive>:EFAULT
2179  *	<pru_soreceive>:EINTR
2180  *	<pru_soreceive>:EBADF
2181  *	<pru_soreceive>:EINVAL
2182  *	<pru_soreceive>:EMSGSIZE
2183  *	<pru_soreceive>:???
2184  *
2185  * Notes:	Additional return values from calls through <pru_soreceive>
2186  *		depend on protocols other than TCP or AF_UNIX, which are
2187  *		documented above.
2188  */
2189 static int
recvit(proc_ref_t p,int s,user_msghdr_ref_t mp,uio_t uiop,user_addr_t namelenp,int32_ref_t retval)2190 recvit(proc_ref_t p, int s, user_msghdr_ref_t mp, uio_t uiop,
2191     user_addr_t namelenp, int32_ref_t retval)
2192 {
2193 	ssize_t len;
2194 	int error;
2195 	mbuf_ref_t  control = 0;
2196 	socket_ref_t so;
2197 	sockaddr_ref_t  fromsa = 0;
2198 	fileproc_ref_t  fp;
2199 
2200 	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
2201 	if ((error = fp_get_ftype(p, s, DTYPE_SOCKET, ENOTSOCK, &fp))) {
2202 		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2203 		return error;
2204 	}
2205 	so = (struct socket *)fp_get_data(fp);
2206 
2207 #if CONFIG_MACF_SOCKET_SUBSET
2208 	/*
2209 	 * We check the state without holding the socket lock;
2210 	 * if a race condition occurs, it would simply result
2211 	 * in an extra call to the MAC check function.
2212 	 */
2213 	if (!(so->so_state & SS_DEFUNCT) &&
2214 	    !(so->so_state & SS_ISCONNECTED) &&
2215 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2216 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2217 		goto out1;
2218 	}
2219 #endif /* MAC_SOCKET_SUBSET */
2220 	if (uio_resid(uiop) < 0 || uio_resid(uiop) > INT_MAX) {
2221 		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
2222 		error = EINVAL;
2223 		goto out1;
2224 	}
2225 
2226 	len = uio_resid(uiop);
2227 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
2228 	    NULL, mp->msg_control ? &control : NULL,
2229 	    &mp->msg_flags);
2230 	if (fromsa) {
2231 		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
2232 		    fromsa);
2233 	}
2234 	if (error) {
2235 		if (uio_resid(uiop) != len && (error == ERESTART ||
2236 		    error == EINTR || error == EWOULDBLOCK)) {
2237 			error = 0;
2238 		}
2239 	}
2240 	if (error) {
2241 		goto out;
2242 	}
2243 
2244 	*retval = (int32_t)(len - uio_resid(uiop));
2245 
2246 	if (mp->msg_name) {
2247 		error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
2248 		if (error) {
2249 			goto out;
2250 		}
2251 		/* return the actual, untruncated address length */
2252 		if (namelenp &&
2253 		    (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2254 		    sizeof(int)))) {
2255 			goto out;
2256 		}
2257 	}
2258 
2259 	if (mp->msg_control) {
2260 		error = copyout_control(p, control, mp->msg_control,
2261 		    &mp->msg_controllen, &mp->msg_flags, so);
2262 	}
2263 out:
2264 	free_sockaddr(fromsa);
2265 	if (control) {
2266 		m_freem(control);
2267 	}
2268 	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2269 out1:
2270 	fp_drop(p, s, fp, 0);
2271 	return error;
2272 }
2273 
2274 /*
2275  * Returns:	0			Success
2276  *		ENOMEM
2277  *	copyin:EFAULT
2278  *	recvit:???
2279  *	read:???			[4056224: applicable for pipes]
2280  *
2281  * Notes:	The read entry point is only called as part of support for
2282  *		binary backward compatability; new code should use read
2283  *		instead of recv or recvfrom when attempting to read data
2284  *		from pipes.
2285  *
2286  *		For full documentation of the return codes from recvit, see
2287  *		the block header for the recvit function.
2288  */
2289 int
recvfrom(proc_ref_t p,struct recvfrom_args * uap,int32_ref_t retval)2290 recvfrom(proc_ref_t p, struct recvfrom_args *uap, int32_ref_t retval)
2291 {
2292 	__pthread_testcancel(1);
2293 	return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2294 	           retval);
2295 }
2296 
2297 int
recvfrom_nocancel(proc_ref_t p,struct recvfrom_nocancel_args * uap,int32_ref_t retval)2298 recvfrom_nocancel(proc_ref_t p, struct recvfrom_nocancel_args *uap,
2299     int32_ref_t retval)
2300 {
2301 	struct user_msghdr msg;
2302 	int error;
2303 	uio_t __single auio = NULL;
2304 
2305 	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2306 	AUDIT_ARG(fd, uap->s);
2307 
2308 	if (uap->fromlenaddr) {
2309 		error = copyin(uap->fromlenaddr,
2310 		    (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2311 		if (error) {
2312 			return error;
2313 		}
2314 	} else {
2315 		msg.msg_namelen = 0;
2316 	}
2317 	msg.msg_name = uap->from;
2318 	auio = uio_create(1, 0,
2319 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2320 	    UIO_READ);
2321 	if (auio == NULL) {
2322 		return ENOMEM;
2323 	}
2324 
2325 	uio_addiov(auio, uap->buf, uap->len);
2326 	/* no need to set up msg_iov.  recvit uses uio_t we send it */
2327 	msg.msg_iov = 0;
2328 	msg.msg_iovlen = 0;
2329 	msg.msg_control = 0;
2330 	msg.msg_controllen = 0;
2331 	msg.msg_flags = uap->flags;
2332 	error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2333 	if (auio != NULL) {
2334 		uio_free(auio);
2335 	}
2336 
2337 	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2338 
2339 	return error;
2340 }
2341 
2342 /*
2343  * Returns:	0			Success
2344  *		EMSGSIZE
2345  *		ENOMEM
2346  *	copyin:EFAULT
2347  *	copyout:EFAULT
2348  *	recvit:???
2349  *
2350  * Notes:	For full documentation of the return codes from recvit, see
2351  *		the block header for the recvit function.
2352  */
2353 int
recvmsg(proc_ref_t p,struct recvmsg_args * uap,int32_ref_t retval)2354 recvmsg(proc_ref_t p, struct recvmsg_args *uap, int32_ref_t retval)
2355 {
2356 	__pthread_testcancel(1);
2357 	return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2358 	           retval);
2359 }
2360 
2361 int
recvmsg_nocancel(proc_ref_t p,struct recvmsg_nocancel_args * uap,int32_ref_t retval)2362 recvmsg_nocancel(proc_ref_t p, struct recvmsg_nocancel_args *uap,
2363     int32_ref_t retval)
2364 {
2365 	struct user32_msghdr msg32;
2366 	struct user64_msghdr msg64;
2367 	struct user_msghdr user_msg;
2368 	caddr_t msghdrp;
2369 	int     size_of_msghdr;
2370 	user_addr_t uiov;
2371 	int error;
2372 	uio_t __single auio = NULL;
2373 	struct user_iovec *iovp;
2374 
2375 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2376 
2377 	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2378 	AUDIT_ARG(fd, uap->s);
2379 	if (is_p_64bit_process) {
2380 		msghdrp = (caddr_t)&msg64;
2381 		size_of_msghdr = sizeof(msg64);
2382 	} else {
2383 		msghdrp = (caddr_t)&msg32;
2384 		size_of_msghdr = sizeof(msg32);
2385 	}
2386 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
2387 	if (error) {
2388 		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2389 		return error;
2390 	}
2391 
2392 	/* only need to copy if user process is not 64-bit */
2393 	if (is_p_64bit_process) {
2394 		user_msg.msg_flags = msg64.msg_flags;
2395 		user_msg.msg_controllen = msg64.msg_controllen;
2396 		user_msg.msg_control = (user_addr_t)msg64.msg_control;
2397 		user_msg.msg_iovlen = msg64.msg_iovlen;
2398 		user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
2399 		user_msg.msg_namelen = msg64.msg_namelen;
2400 		user_msg.msg_name = (user_addr_t)msg64.msg_name;
2401 	} else {
2402 		user_msg.msg_flags = msg32.msg_flags;
2403 		user_msg.msg_controllen = msg32.msg_controllen;
2404 		user_msg.msg_control = msg32.msg_control;
2405 		user_msg.msg_iovlen = msg32.msg_iovlen;
2406 		user_msg.msg_iov = msg32.msg_iov;
2407 		user_msg.msg_namelen = msg32.msg_namelen;
2408 		user_msg.msg_name = msg32.msg_name;
2409 	}
2410 
2411 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2412 		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2413 		    0, 0, 0, 0);
2414 		return EMSGSIZE;
2415 	}
2416 
2417 	user_msg.msg_flags = uap->flags;
2418 
2419 	/* allocate a uio large enough to hold the number of iovecs passed */
2420 	auio = uio_create(user_msg.msg_iovlen, 0,
2421 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
2422 	    UIO_READ);
2423 	if (auio == NULL) {
2424 		error = ENOMEM;
2425 		goto done;
2426 	}
2427 
2428 	/*
2429 	 * get location of iovecs within the uio.  then copyin the iovecs from
2430 	 * user space.
2431 	 */
2432 	iovp = uio_iovsaddr_user(auio);
2433 	if (iovp == NULL) {
2434 		error = ENOMEM;
2435 		goto done;
2436 	}
2437 	uiov = user_msg.msg_iov;
2438 	user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2439 	error = copyin_user_iovec_array(uiov,
2440 	    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2441 	    user_msg.msg_iovlen, iovp);
2442 	if (error) {
2443 		goto done;
2444 	}
2445 
2446 	/* finish setup of uio_t */
2447 	error = uio_calculateresid_user(auio);
2448 	if (error) {
2449 		goto done;
2450 	}
2451 
2452 	error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2453 	if (!error) {
2454 		user_msg.msg_iov = uiov;
2455 		if (is_p_64bit_process) {
2456 			msg64.msg_flags = user_msg.msg_flags;
2457 			msg64.msg_controllen = user_msg.msg_controllen;
2458 			msg64.msg_control = user_msg.msg_control;
2459 			msg64.msg_iovlen = user_msg.msg_iovlen;
2460 			msg64.msg_iov = user_msg.msg_iov;
2461 			msg64.msg_namelen = user_msg.msg_namelen;
2462 			msg64.msg_name = user_msg.msg_name;
2463 		} else {
2464 			msg32.msg_flags = user_msg.msg_flags;
2465 			msg32.msg_controllen = user_msg.msg_controllen;
2466 			msg32.msg_control = (user32_addr_t)user_msg.msg_control;
2467 			msg32.msg_iovlen = user_msg.msg_iovlen;
2468 			msg32.msg_iov = (user32_addr_t)user_msg.msg_iov;
2469 			msg32.msg_namelen = user_msg.msg_namelen;
2470 			msg32.msg_name = (user32_addr_t)user_msg.msg_name;
2471 		}
2472 		error = copyout(msghdrp, uap->msg, size_of_msghdr);
2473 	}
2474 done:
2475 	if (auio != NULL) {
2476 		uio_free(auio);
2477 	}
2478 	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2479 	return error;
2480 }
2481 
2482 __attribute__((noinline))
2483 static int
recvmsg_x_array(proc_ref_t p,socket_ref_t so,struct recvmsg_x_args * uap,user_ssize_t * retval)2484 recvmsg_x_array(proc_ref_t p, socket_ref_t so, struct recvmsg_x_args *uap, user_ssize_t *retval)
2485 {
2486 	int error = EOPNOTSUPP;
2487 	user_msghdr_x_ptr_t user_msg_x = NULL;
2488 	recv_msg_elem_ptr_t recv_msg_array = NULL;
2489 	user_ssize_t len_before = 0, len_after;
2490 	size_t size_of_msghdr;
2491 	void_ptr_t umsgp = NULL;
2492 	u_int i;
2493 	u_int uiocnt;
2494 	int flags = uap->flags;
2495 
2496 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2497 
2498 	size_of_msghdr = is_p_64bit_process ?
2499 	    sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2500 
2501 	/*
2502 	 * Support only a subset of message flags
2503 	 */
2504 	if (uap->flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA |  MSG_NBIO)) {
2505 		return EOPNOTSUPP;
2506 	}
2507 	/*
2508 	 * Input parameter range check
2509 	 */
2510 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2511 		error = EINVAL;
2512 		goto out;
2513 	}
2514 	if (uap->cnt > somaxrecvmsgx) {
2515 		uap->cnt = somaxrecvmsgx > 0 ? somaxrecvmsgx : 1;
2516 	}
2517 
2518 	user_msg_x = kalloc_type(struct user_msghdr_x, uap->cnt,
2519 	    Z_WAITOK | Z_ZERO);
2520 	if (user_msg_x == NULL) {
2521 		DBG_PRINTF("%s user_msg_x alloc failed", __func__);
2522 		error = ENOMEM;
2523 		goto out;
2524 	}
2525 	recv_msg_array = alloc_recv_msg_array(uap->cnt);
2526 	if (recv_msg_array == NULL) {
2527 		DBG_PRINTF("%s alloc_recv_msg_array() failed", __func__);
2528 		error = ENOMEM;
2529 		goto out;
2530 	}
2531 
2532 	umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
2533 	if (umsgp == NULL) {
2534 		DBG_PRINTF("%s umsgp alloc failed", __func__);
2535 		error = ENOMEM;
2536 		goto out;
2537 	}
2538 	error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2539 	if (error) {
2540 		DBG_PRINTF("%s copyin() failed", __func__);
2541 		goto out;
2542 	}
2543 	error = internalize_recv_msghdr_array(umsgp,
2544 	    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2545 	    UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2546 	if (error) {
2547 		DBG_PRINTF("%s copyin_user_msghdr_array() failed", __func__);
2548 		goto out;
2549 	}
2550 	/*
2551 	 * Make sure the size of each message iovec and
2552 	 * the aggregate size of all the iovec is valid
2553 	 */
2554 	if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2555 		error = EINVAL;
2556 		goto out;
2557 	}
2558 	/*
2559 	 * Sanity check on passed arguments
2560 	 */
2561 	for (i = 0; i < uap->cnt; i++) {
2562 		struct user_msghdr_x *mp = user_msg_x + i;
2563 
2564 		if (mp->msg_flags != 0) {
2565 			error = EINVAL;
2566 			goto out;
2567 		}
2568 	}
2569 #if CONFIG_MACF_SOCKET_SUBSET
2570 	/*
2571 	 * We check the state without holding the socket lock;
2572 	 * if a race condition occurs, it would simply result
2573 	 * in an extra call to the MAC check function.
2574 	 */
2575 	if (!(so->so_state & SS_DEFUNCT) &&
2576 	    !(so->so_state & SS_ISCONNECTED) &&
2577 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2578 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2579 		goto out;
2580 	}
2581 #endif /* MAC_SOCKET_SUBSET */
2582 
2583 	len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2584 
2585 	for (i = 0; i < uap->cnt; i++) {
2586 		struct recv_msg_elem *recv_msg_elem;
2587 		uio_t auio;
2588 		sockaddr_ref_ref_t psa;
2589 		struct mbuf **controlp;
2590 
2591 		recv_msg_elem = recv_msg_array + i;
2592 		auio = recv_msg_elem->uio;
2593 
2594 		/*
2595 		 * Do not block if we got at least one packet
2596 		 */
2597 		if (i > 0) {
2598 			flags |= MSG_DONTWAIT;
2599 		}
2600 
2601 		psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2602 		    &recv_msg_elem->psa : NULL;
2603 		controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2604 		    &recv_msg_elem->controlp : NULL;
2605 
2606 		error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2607 		    auio, NULL, controlp, &flags);
2608 		if (error) {
2609 			break;
2610 		}
2611 		/*
2612 		 * We have some data
2613 		 */
2614 		recv_msg_elem->which |= SOCK_MSG_DATA;
2615 		/*
2616 		 * Set the messages flags for this packet
2617 		 */
2618 		flags &= ~MSG_DONTWAIT;
2619 		recv_msg_elem->flags = flags;
2620 		/*
2621 		 * Stop on partial copy
2622 		 */
2623 		if (recv_msg_elem->flags & (MSG_RCVMORE | MSG_TRUNC)) {
2624 			break;
2625 		}
2626 	}
2627 
2628 	len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2629 
2630 	if (error) {
2631 		if (len_after != len_before && (error == ERESTART ||
2632 		    error == EINTR || error == EWOULDBLOCK)) {
2633 			error = 0;
2634 		} else {
2635 			goto out;
2636 		}
2637 	}
2638 
2639 	uiocnt = externalize_recv_msghdr_array(p, so, umsgp,
2640 	    uap->cnt, user_msg_x, recv_msg_array, &error);
2641 	if (error != 0) {
2642 		goto out;
2643 	}
2644 
2645 	error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2646 	if (error) {
2647 		DBG_PRINTF("%s copyout() failed", __func__);
2648 		goto out;
2649 	}
2650 	*retval = (int)(uiocnt);
2651 
2652 out:
2653 	kfree_data(umsgp, uap->cnt * size_of_msghdr);
2654 	free_recv_msg_array(recv_msg_array, uap->cnt);
2655 	kfree_type(struct user_msghdr_x, uap->cnt, user_msg_x);
2656 
2657 	return error;
2658 }
2659 
2660 int
recvmsg_x(struct proc * p,struct recvmsg_x_args * uap,user_ssize_t * retval)2661 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2662 {
2663 	int error = EOPNOTSUPP;
2664 	socket_ref_t so;
2665 	size_t size_of_msghdrx;
2666 	caddr_t msghdrxp;
2667 	struct user32_msghdr_x msghdrx32 = {};
2668 	struct user64_msghdr_x msghdrx64 = {};
2669 	int spacetype;
2670 	u_int i;
2671 	uio_t auio = NULL;
2672 	caddr_t src;
2673 	int flags;
2674 	mbuf_ref_t pkt_list = NULL, m;
2675 	mbuf_ref_t addr_list = NULL, m_addr;
2676 	mbuf_ref_t ctl_list = NULL, control;
2677 	u_int pktcnt;
2678 
2679 	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2680 
2681 	error = file_socket(uap->s, &so);
2682 	if (error) {
2683 		goto done_no_filedrop;
2684 	}
2685 	if (so == NULL) {
2686 		error = EBADF;
2687 		goto done;
2688 	}
2689 
2690 #if CONFIG_MACF_SOCKET_SUBSET
2691 	/*
2692 	 * We check the state without holding the socket lock;
2693 	 * if a race condition occurs, it would simply result
2694 	 * in an extra call to the MAC check function.
2695 	 */
2696 	if (!(so->so_state & SS_DEFUNCT) &&
2697 	    !(so->so_state & SS_ISCONNECTED) &&
2698 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2699 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2700 		goto done;
2701 	}
2702 #endif /* MAC_SOCKET_SUBSET */
2703 
2704 	/*
2705 	 * With soreceive_m_list, all packets must be uniform, with address and
2706 	 * control as they are returned in parallel lists and it's only guaranteed
2707 	 * when pru_send_list is supported
2708 	 */
2709 	if (do_recvmsg_x_donttrunc != 0 || (so->so_options & SO_DONTTRUNC)) {
2710 		error = recvmsg_x_array(p, so, uap, retval);
2711 		goto done;
2712 	}
2713 
2714 	/*
2715 	 * Input parameter range check
2716 	 */
2717 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2718 		error = EINVAL;
2719 		goto done;
2720 	}
2721 	if (uap->cnt > somaxrecvmsgx) {
2722 		uap->cnt = somaxrecvmsgx > 0 ? somaxrecvmsgx : 1;
2723 	}
2724 
2725 	if (IS_64BIT_PROCESS(p)) {
2726 		msghdrxp = (caddr_t)&msghdrx64;
2727 		size_of_msghdrx = sizeof(struct user64_msghdr_x);
2728 		spacetype = UIO_USERSPACE64;
2729 	} else {
2730 		msghdrxp = (caddr_t)&msghdrx32;
2731 		size_of_msghdrx = sizeof(struct user32_msghdr_x);
2732 		spacetype = UIO_USERSPACE32;
2733 	}
2734 	src = __unsafe_forge_bidi_indexable(caddr_t, uap->msgp, uap->cnt);
2735 
2736 	flags = uap->flags;
2737 
2738 	/*
2739 	 * Only allow MSG_DONTWAIT
2740 	 */
2741 	if ((flags & ~(MSG_DONTWAIT | MSG_NBIO)) != 0) {
2742 		error = EINVAL;
2743 		goto done;
2744 	}
2745 
2746 	/*
2747 	 * Receive list of packet in a single call
2748 	 */
2749 	pktcnt = uap->cnt;
2750 	error = soreceive_m_list(so, &pktcnt, &addr_list, &pkt_list, &ctl_list,
2751 	    &flags);
2752 	if (error != 0) {
2753 		if (pktcnt != 0 && (error == ERESTART ||
2754 		    error == EINTR || error == EWOULDBLOCK)) {
2755 			error = 0;
2756 		} else {
2757 			goto done;
2758 		}
2759 	}
2760 
2761 	m_addr = addr_list;
2762 	m = pkt_list;
2763 	control = ctl_list;
2764 
2765 	for (i = 0; i < pktcnt; i++) {
2766 		struct user_msghdr user_msg;
2767 		ssize_t len;
2768 		struct user_iovec *iovp;
2769 		struct mbuf *n;
2770 
2771 		if (!m_has_mtype(m, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
2772 			panic("%s: m %p m_type %d != MT_DATA", __func__, m, m->m_type);
2773 		}
2774 
2775 		error = copyin((user_addr_t)(src + i * size_of_msghdrx),
2776 		    msghdrxp, size_of_msghdrx);
2777 		if (error) {
2778 			DBG_PRINTF("%s copyin() msghdrx failed %d\n",
2779 			    __func__, error);
2780 			goto done;
2781 		}
2782 		if (spacetype == UIO_USERSPACE64) {
2783 			user_msg.msg_name = msghdrx64.msg_name;
2784 			user_msg.msg_namelen = msghdrx64.msg_namelen;
2785 			user_msg.msg_iov = msghdrx64.msg_iov;
2786 			user_msg.msg_iovlen = msghdrx64.msg_iovlen;
2787 			user_msg.msg_control = msghdrx64.msg_control;
2788 			user_msg.msg_controllen = msghdrx64.msg_controllen;
2789 		} else {
2790 			user_msg.msg_name = msghdrx32.msg_name;
2791 			user_msg.msg_namelen = msghdrx32.msg_namelen;
2792 			user_msg.msg_iov = msghdrx32.msg_iov;
2793 			user_msg.msg_iovlen = msghdrx32.msg_iovlen;
2794 			user_msg.msg_control = msghdrx32.msg_control;
2795 			user_msg.msg_controllen = msghdrx32.msg_controllen;
2796 		}
2797 		user_msg.msg_flags = 0;
2798 		if (user_msg.msg_iovlen <= 0 ||
2799 		    user_msg.msg_iovlen > UIO_MAXIOV) {
2800 			error = EMSGSIZE;
2801 			DBG_PRINTF("%s bad msg_iovlen, error %d\n",
2802 			    __func__, error);
2803 			goto done;
2804 		}
2805 		/*
2806 		 * Attempt to reuse the uio if large enough, otherwise we need
2807 		 * a new one
2808 		 */
2809 		if (auio != NULL) {
2810 			if (auio->uio_max_iovs <= user_msg.msg_iovlen) {
2811 				uio_reset_fast(auio, 0, spacetype, UIO_READ);
2812 			} else {
2813 				uio_free(auio);
2814 				auio = NULL;
2815 			}
2816 		}
2817 		if (auio == NULL) {
2818 			auio = uio_create(user_msg.msg_iovlen, 0, spacetype,
2819 			    UIO_READ);
2820 			if (auio == NULL) {
2821 				error = ENOBUFS;
2822 				DBG_PRINTF("%s uio_create() failed %d\n",
2823 				    __func__, error);
2824 				goto done;
2825 			}
2826 		}
2827 		/*
2828 		 * get location of iovecs within the uio then copy the iovecs
2829 		 * from user space.
2830 		 */
2831 		iovp = uio_iovsaddr_user(auio);
2832 		if (iovp == NULL) {
2833 			error = ENOMEM;
2834 			DBG_PRINTF("%s uio_iovsaddr() failed %d\n",
2835 			    __func__, error);
2836 			goto done;
2837 		}
2838 		error = copyin_user_iovec_array(user_msg.msg_iov,
2839 		    spacetype, user_msg.msg_iovlen, iovp);
2840 		if (error != 0) {
2841 			DBG_PRINTF("%s copyin_user_iovec_array() failed %d\n",
2842 			    __func__, error);
2843 			goto done;
2844 		}
2845 		error = uio_calculateresid_user(auio);
2846 		if (error != 0) {
2847 			DBG_PRINTF("%s uio_calculateresid() failed %d\n",
2848 			    __func__, error);
2849 			goto done;
2850 		}
2851 		user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2852 
2853 		len = uio_resid(auio);
2854 		for (n = m; n != NULL; n = n->m_next) {
2855 			user_ssize_t resid = uio_resid(auio);
2856 			if (resid < n->m_len) {
2857 				error = uio_copyout_user(mtod(n, caddr_t), (int)n->m_len, auio);
2858 				if (error != 0) {
2859 					DBG_PRINTF("%s uiomove() failed\n",
2860 					    __func__);
2861 					goto done;
2862 				}
2863 				flags |= MSG_TRUNC;
2864 				break;
2865 			}
2866 
2867 			error = uio_copyout_user(mtod(n, caddr_t), (int)n->m_len, auio);
2868 			if (error != 0) {
2869 				DBG_PRINTF("%s uiomove() failed\n",
2870 				    __func__);
2871 				goto done;
2872 			}
2873 		}
2874 		len -= uio_resid(auio);
2875 
2876 		if (user_msg.msg_name != 0 && user_msg.msg_namelen != 0) {
2877 			error = copyout_maddr(m_addr, user_msg.msg_name,
2878 			    &user_msg.msg_namelen);
2879 			if (error) {
2880 				DBG_PRINTF("%s copyout_maddr()  failed\n",
2881 				    __func__);
2882 				goto done;
2883 			}
2884 		}
2885 		if (user_msg.msg_control != 0 && user_msg.msg_controllen != 0) {
2886 			error = copyout_control(p, control,
2887 			    user_msg.msg_control, &user_msg.msg_controllen,
2888 			    &user_msg.msg_flags, so);
2889 			if (error) {
2890 				DBG_PRINTF("%s copyout_control() failed\n",
2891 				    __func__);
2892 				goto done;
2893 			}
2894 		}
2895 		/*
2896 		 * Note: the original msg_iovlen and msg_iov do not change
2897 		 */
2898 		if (spacetype == UIO_USERSPACE64) {
2899 			msghdrx64.msg_flags = user_msg.msg_flags;
2900 			msghdrx64.msg_controllen = user_msg.msg_controllen;
2901 			msghdrx64.msg_control = user_msg.msg_control;
2902 			msghdrx64.msg_namelen = user_msg.msg_namelen;
2903 			msghdrx64.msg_name = user_msg.msg_name;
2904 			msghdrx64.msg_datalen = len;
2905 		} else {
2906 			msghdrx32.msg_flags = user_msg.msg_flags;
2907 			msghdrx32.msg_controllen = user_msg.msg_controllen;
2908 			msghdrx32.msg_control = (user32_addr_t) user_msg.msg_control;
2909 			msghdrx32.msg_name = user_msg.msg_namelen;
2910 			msghdrx32.msg_name = (user32_addr_t) user_msg.msg_name;
2911 			msghdrx32.msg_datalen = (user32_size_t) len;
2912 		}
2913 		error = copyout(msghdrxp,
2914 		    (user_addr_t)(src + i * size_of_msghdrx),
2915 		    size_of_msghdrx);
2916 		if (error) {
2917 			DBG_PRINTF("%s copyout() msghdrx failed\n", __func__);
2918 			goto done;
2919 		}
2920 
2921 		m = m->m_nextpkt;
2922 		if (control != NULL) {
2923 			control = control->m_nextpkt;
2924 		}
2925 		if (m_addr != NULL) {
2926 			m_addr = m_addr->m_nextpkt;
2927 		}
2928 	}
2929 
2930 	uap->flags = flags;
2931 
2932 	*retval = (int)i;
2933 done:
2934 	file_drop(uap->s);
2935 
2936 done_no_filedrop:
2937 	if (pkt_list != NULL) {
2938 		m_freem_list(pkt_list);
2939 	}
2940 	if (addr_list != NULL) {
2941 		m_freem_list(addr_list);
2942 	}
2943 	if (ctl_list != NULL) {
2944 		m_freem_list(ctl_list);
2945 	}
2946 	if (auio != NULL) {
2947 		uio_free(auio);
2948 	}
2949 
2950 	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2951 
2952 	return error;
2953 }
2954 
2955 /*
2956  * Returns:	0			Success
2957  *		EBADF
2958  *	file_socket:ENOTSOCK
2959  *	file_socket:EBADF
2960  *	soshutdown:EINVAL
2961  *	soshutdown:ENOTCONN
2962  *	soshutdown:EADDRNOTAVAIL[TCP]
2963  *	soshutdown:ENOBUFS[TCP]
2964  *	soshutdown:EMSGSIZE[TCP]
2965  *	soshutdown:EHOSTUNREACH[TCP]
2966  *	soshutdown:ENETUNREACH[TCP]
2967  *	soshutdown:ENETDOWN[TCP]
2968  *	soshutdown:ENOMEM[TCP]
2969  *	soshutdown:EACCES[TCP]
2970  *	soshutdown:EMSGSIZE[TCP]
2971  *	soshutdown:ENOBUFS[TCP]
2972  *	soshutdown:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
2973  *	soshutdown:???			[other protocol families]
2974  */
2975 /* ARGSUSED */
2976 int
shutdown(__unused proc_ref_t p,struct shutdown_args * uap,__unused int32_ref_t retval)2977 shutdown(__unused proc_ref_t p, struct shutdown_args *uap,
2978     __unused int32_ref_t retval)
2979 {
2980 	socket_ref_t so;
2981 	int error;
2982 
2983 	AUDIT_ARG(fd, uap->s);
2984 	error = file_socket(uap->s, &so);
2985 	if (error) {
2986 		return error;
2987 	}
2988 	if (so == NULL) {
2989 		error = EBADF;
2990 		goto out;
2991 	}
2992 	error =  soshutdown((struct socket *)so, uap->how);
2993 out:
2994 	file_drop(uap->s);
2995 	return error;
2996 }
2997 
2998 /*
2999  * Returns:	0			Success
3000  *		EFAULT
3001  *		EINVAL
3002  *		EACCES			Mandatory Access Control failure
3003  *	file_socket:ENOTSOCK
3004  *	file_socket:EBADF
3005  *	sosetopt:EINVAL
3006  *	sosetopt:ENOPROTOOPT
3007  *	sosetopt:ENOBUFS
3008  *	sosetopt:EDOM
3009  *	sosetopt:EFAULT
3010  *	sosetopt:EOPNOTSUPP[AF_UNIX]
3011  *	sosetopt:???
3012  */
3013 /* ARGSUSED */
3014 int
setsockopt(proc_ref_t p,setsockopt_args_ref_t uap,__unused int32_ref_t retval)3015 setsockopt(proc_ref_t p, setsockopt_args_ref_t uap,
3016     __unused int32_ref_t retval)
3017 {
3018 	socket_ref_t so;
3019 	struct sockopt sopt;
3020 	int error;
3021 
3022 	AUDIT_ARG(fd, uap->s);
3023 	if (uap->val == 0 && uap->valsize != 0) {
3024 		return EFAULT;
3025 	}
3026 	/* No bounds checking on size (it's unsigned) */
3027 
3028 	error = file_socket(uap->s, &so);
3029 	if (error) {
3030 		return error;
3031 	}
3032 
3033 	sopt.sopt_dir = SOPT_SET;
3034 	sopt.sopt_level = uap->level;
3035 	sopt.sopt_name = uap->name;
3036 	sopt.sopt_val = uap->val;
3037 	sopt.sopt_valsize = uap->valsize;
3038 	sopt.sopt_p = p;
3039 
3040 	if (so == NULL) {
3041 		error = EINVAL;
3042 		goto out;
3043 	}
3044 #if CONFIG_MACF_SOCKET_SUBSET
3045 	if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
3046 	    &sopt)) != 0) {
3047 		goto out;
3048 	}
3049 #endif /* MAC_SOCKET_SUBSET */
3050 	error = sosetoptlock(so, &sopt, 1);     /* will lock socket */
3051 out:
3052 	file_drop(uap->s);
3053 	return error;
3054 }
3055 
3056 /*
3057  * Returns:	0			Success
3058  *		EINVAL
3059  *		EBADF
3060  *		EACCES			Mandatory Access Control failure
3061  *	copyin:EFAULT
3062  *	copyout:EFAULT
3063  *	file_socket:ENOTSOCK
3064  *	file_socket:EBADF
3065  *	sogetopt:???
3066  */
3067 int
getsockopt(proc_ref_t p,struct getsockopt_args * uap,__unused int32_ref_t retval)3068 getsockopt(proc_ref_t p, struct getsockopt_args  *uap,
3069     __unused int32_ref_t retval)
3070 {
3071 	int             error;
3072 	socklen_t       valsize;
3073 	struct sockopt  sopt;
3074 	socket_ref_t so;
3075 
3076 	error = file_socket(uap->s, &so);
3077 	if (error) {
3078 		return error;
3079 	}
3080 	if (uap->val) {
3081 		error = copyin(uap->avalsize, (caddr_t)&valsize,
3082 		    sizeof(valsize));
3083 		if (error) {
3084 			goto out;
3085 		}
3086 		/* No bounds checking on size (it's unsigned) */
3087 	} else {
3088 		valsize = 0;
3089 	}
3090 	sopt.sopt_dir = SOPT_GET;
3091 	sopt.sopt_level = uap->level;
3092 	sopt.sopt_name = uap->name;
3093 	sopt.sopt_val = uap->val;
3094 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
3095 	sopt.sopt_p = p;
3096 
3097 	if (so == NULL) {
3098 		error = EBADF;
3099 		goto out;
3100 	}
3101 #if CONFIG_MACF_SOCKET_SUBSET
3102 	if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
3103 	    &sopt)) != 0) {
3104 		goto out;
3105 	}
3106 #endif /* MAC_SOCKET_SUBSET */
3107 	error = sogetoptlock((struct socket *)so, &sopt, 1);    /* will lock */
3108 	if (error == 0) {
3109 		valsize = (socklen_t)sopt.sopt_valsize;
3110 		error = copyout((caddr_t)&valsize, uap->avalsize,
3111 		    sizeof(valsize));
3112 	}
3113 out:
3114 	file_drop(uap->s);
3115 	return error;
3116 }
3117 
3118 
3119 /*
3120  * Get socket name.
3121  *
3122  * Returns:	0			Success
3123  *		EBADF
3124  *	file_socket:ENOTSOCK
3125  *	file_socket:EBADF
3126  *	copyin:EFAULT
3127  *	copyout:EFAULT
3128  *	<pru_sockaddr>:ENOBUFS[TCP]
3129  *	<pru_sockaddr>:ECONNRESET[TCP]
3130  *	<pru_sockaddr>:EINVAL[AF_UNIX]
3131  *	<sf_getsockname>:???
3132  */
3133 /* ARGSUSED */
3134 int
getsockname(__unused proc_ref_t p,struct getsockname_args * uap,__unused int32_ref_t retval)3135 getsockname(__unused proc_ref_t p, struct getsockname_args *uap,
3136     __unused int32_ref_t retval)
3137 {
3138 	socket_ref_t so;
3139 	sockaddr_ref_t  sa;
3140 	socklen_t len;
3141 	socklen_t sa_len;
3142 	int error;
3143 
3144 	error = file_socket(uap->fdes, &so);
3145 	if (error) {
3146 		return error;
3147 	}
3148 	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
3149 	if (error) {
3150 		goto out;
3151 	}
3152 	if (so == NULL) {
3153 		error = EBADF;
3154 		goto out;
3155 	}
3156 	sa = 0;
3157 	socket_lock(so, 1);
3158 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
3159 	if (error == 0) {
3160 		error = sflt_getsockname(so, &sa);
3161 		if (error == EJUSTRETURN) {
3162 			error = 0;
3163 		}
3164 	}
3165 	socket_unlock(so, 1);
3166 	if (error) {
3167 		goto bad;
3168 	}
3169 	if (sa == 0) {
3170 		len = 0;
3171 		goto gotnothing;
3172 	}
3173 
3174 	sa_len = sa->sa_len;
3175 	len = MIN(len, sa_len);
3176 	error = copyout(__SA_UTILS_CONV_TO_BYTES(sa), uap->asa, len);
3177 	if (error) {
3178 		goto bad;
3179 	}
3180 	/* return the actual, untruncated address length */
3181 	len = sa_len;
3182 gotnothing:
3183 	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
3184 bad:
3185 	free_sockaddr(sa);
3186 out:
3187 	file_drop(uap->fdes);
3188 	return error;
3189 }
3190 
3191 /*
3192  * Get name of peer for connected socket.
3193  *
3194  * Returns:	0			Success
3195  *		EBADF
3196  *		EINVAL
3197  *		ENOTCONN
3198  *	file_socket:ENOTSOCK
3199  *	file_socket:EBADF
3200  *	copyin:EFAULT
3201  *	copyout:EFAULT
3202  *	<pru_peeraddr>:???
3203  *	<sf_getpeername>:???
3204  */
3205 /* ARGSUSED */
3206 int
getpeername(__unused proc_ref_t p,struct getpeername_args * uap,__unused int32_ref_t retval)3207 getpeername(__unused proc_ref_t p, struct getpeername_args *uap,
3208     __unused int32_ref_t retval)
3209 {
3210 	socket_ref_t so;
3211 	sockaddr_ref_t  sa;
3212 	socklen_t len;
3213 	socklen_t sa_len;
3214 	int error;
3215 
3216 	error = file_socket(uap->fdes, &so);
3217 	if (error) {
3218 		return error;
3219 	}
3220 	if (so == NULL) {
3221 		error = EBADF;
3222 		goto out;
3223 	}
3224 
3225 	socket_lock(so, 1);
3226 
3227 	if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
3228 	    (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
3229 		/* the socket has been shutdown, no more getpeername's */
3230 		socket_unlock(so, 1);
3231 		error = EINVAL;
3232 		goto out;
3233 	}
3234 
3235 	if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
3236 		socket_unlock(so, 1);
3237 		error = ENOTCONN;
3238 		goto out;
3239 	}
3240 	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
3241 	if (error) {
3242 		socket_unlock(so, 1);
3243 		goto out;
3244 	}
3245 	sa = 0;
3246 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
3247 	if (error == 0) {
3248 		error = sflt_getpeername(so, &sa);
3249 		if (error == EJUSTRETURN) {
3250 			error = 0;
3251 		}
3252 	}
3253 	socket_unlock(so, 1);
3254 	if (error) {
3255 		goto bad;
3256 	}
3257 	if (sa == 0) {
3258 		len = 0;
3259 		goto gotnothing;
3260 	}
3261 	sa_len = sa->sa_len;
3262 	len = MIN(len, sa_len);
3263 	error = copyout(__SA_UTILS_CONV_TO_BYTES(sa), uap->asa, len);
3264 	if (error) {
3265 		goto bad;
3266 	}
3267 	/* return the actual, untruncated address length */
3268 	len = sa_len;
3269 gotnothing:
3270 	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
3271 bad:
3272 	free_sockaddr(sa);
3273 out:
3274 	file_drop(uap->fdes);
3275 	return error;
3276 }
3277 
3278 int
sockargs(struct mbuf ** mp,user_addr_t data,socklen_t buflen,int type)3279 sockargs(struct mbuf **mp, user_addr_t data, socklen_t buflen, int type)
3280 {
3281 	sockaddr_ref_t sa;
3282 	struct mbuf *m;
3283 	int error;
3284 	socklen_t alloc_buflen = buflen;
3285 
3286 	if (buflen > INT_MAX / 2) {
3287 		return EINVAL;
3288 	}
3289 	if (type == MT_SONAME && (buflen > SOCK_MAXADDRLEN ||
3290 	    buflen < offsetof(struct sockaddr, sa_data[0]))) {
3291 		return EINVAL;
3292 	}
3293 	if (type == MT_CONTROL && buflen < sizeof(struct cmsghdr)) {
3294 		return EINVAL;
3295 	}
3296 
3297 #ifdef __LP64__
3298 	/*
3299 	 * The fd's in the buffer must expand to be pointers, thus we need twice
3300 	 * as much space
3301 	 */
3302 	if (type == MT_CONTROL) {
3303 		alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
3304 		    sizeof(struct cmsghdr);
3305 	}
3306 #endif
3307 	if (alloc_buflen > MLEN) {
3308 		if (type == MT_SONAME && alloc_buflen <= 112) {
3309 			alloc_buflen = MLEN;    /* unix domain compat. hack */
3310 		} else if (alloc_buflen > MCLBYTES) {
3311 			return EINVAL;
3312 		}
3313 	}
3314 	m = m_get(M_WAIT, type);
3315 	if (m == NULL) {
3316 		return ENOBUFS;
3317 	}
3318 	if (alloc_buflen > MLEN) {
3319 		MCLGET(m, M_WAIT);
3320 		if ((m->m_flags & M_EXT) == 0) {
3321 			m_free(m);
3322 			return ENOBUFS;
3323 		}
3324 	}
3325 	/*
3326 	 * K64: We still copyin the original buflen because it gets expanded
3327 	 * later and we lie about the size of the mbuf because it only affects
3328 	 * unp_* functions
3329 	 */
3330 	m->m_len = buflen;
3331 	error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
3332 	if (error) {
3333 		(void) m_free(m);
3334 	} else {
3335 		*mp = m;
3336 		if (type == MT_SONAME) {
3337 			VERIFY(buflen <= SOCK_MAXADDRLEN);
3338 			sa = mtod(m, sockaddr_ref_t);
3339 			sa->sa_len = (__uint8_t)buflen;
3340 		}
3341 	}
3342 	return error;
3343 }
3344 
3345 /*
3346  * Given a user_addr_t of length len, allocate and fill out a *sa.
3347  *
3348  * Returns:	0			Success
3349  *		ENAMETOOLONG		Filename too long
3350  *		EINVAL			Invalid argument
3351  *		ENOMEM			Not enough space
3352  *		copyin:EFAULT		Bad address
3353  */
3354 static int
getsockaddr(struct socket * so,sockaddr_ref_ref_t namp,user_addr_t uaddr,size_t len,boolean_t translate_unspec)3355 getsockaddr(struct socket *so, sockaddr_ref_ref_t namp, user_addr_t uaddr,
3356     size_t len, boolean_t translate_unspec)
3357 {
3358 	struct sockaddr *sa;
3359 	int error;
3360 
3361 	if (len > SOCK_MAXADDRLEN) {
3362 		return ENAMETOOLONG;
3363 	}
3364 
3365 	if (len < offsetof(struct sockaddr, sa_data[0])) {
3366 		return EINVAL;
3367 	}
3368 
3369 	sa = alloc_sockaddr(len, Z_WAITOK | Z_NOFAIL);
3370 
3371 	error = copyin(uaddr, (caddr_t)sa, len);
3372 	if (error) {
3373 		free_sockaddr(sa);
3374 	} else {
3375 		/*
3376 		 * Force sa_family to AF_INET on AF_INET sockets to handle
3377 		 * legacy applications that use AF_UNSPEC (0).  On all other
3378 		 * sockets we leave it unchanged and let the lower layer
3379 		 * handle it.
3380 		 */
3381 		if (translate_unspec && sa->sa_family == AF_UNSPEC &&
3382 		    SOCK_CHECK_DOM(so, PF_INET) &&
3383 		    len == sizeof(struct sockaddr_in)) {
3384 			sa->sa_family = AF_INET;
3385 		}
3386 		VERIFY(len <= SOCK_MAXADDRLEN);
3387 		sa = *&sa;
3388 		sa->sa_len = (__uint8_t)len;
3389 		*namp = sa;
3390 	}
3391 	return error;
3392 }
3393 
3394 static int
getsockaddr_s(struct socket * so,sockaddr_storage_ref_t ss,user_addr_t uaddr,size_t len,boolean_t translate_unspec)3395 getsockaddr_s(struct socket *so, sockaddr_storage_ref_t ss,
3396     user_addr_t uaddr, size_t len, boolean_t translate_unspec)
3397 {
3398 	int error;
3399 
3400 	if (ss == NULL || uaddr == USER_ADDR_NULL ||
3401 	    len < offsetof(struct sockaddr, sa_data[0])) {
3402 		return EINVAL;
3403 	}
3404 
3405 	/*
3406 	 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
3407 	 * so the check here is inclusive.
3408 	 */
3409 	if (len > sizeof(*ss)) {
3410 		return ENAMETOOLONG;
3411 	}
3412 
3413 	bzero(ss, sizeof(*ss));
3414 	error = copyin(uaddr, __SA_UTILS_CONV_TO_BYTES(ss), len);
3415 	if (error == 0) {
3416 		/*
3417 		 * Force sa_family to AF_INET on AF_INET sockets to handle
3418 		 * legacy applications that use AF_UNSPEC (0).  On all other
3419 		 * sockets we leave it unchanged and let the lower layer
3420 		 * handle it.
3421 		 */
3422 		if (translate_unspec && ss->ss_family == AF_UNSPEC &&
3423 		    SOCK_CHECK_DOM(so, PF_INET) &&
3424 		    len == sizeof(struct sockaddr_in)) {
3425 			ss->ss_family = AF_INET;
3426 		}
3427 
3428 		ss->ss_len = (__uint8_t)len;
3429 	}
3430 	return error;
3431 }
3432 
3433 int
internalize_recv_msghdr_array(const void_ptr_t src,int spacetype,int direction,u_int count,user_msghdr_x_ptr_t dst,recv_msg_elem_ptr_t recv_msg_array)3434 internalize_recv_msghdr_array(const void_ptr_t src, int spacetype, int direction,
3435     u_int count, user_msghdr_x_ptr_t dst,
3436     recv_msg_elem_ptr_t recv_msg_array)
3437 {
3438 	int error = 0;
3439 	u_int i;
3440 
3441 	for (i = 0; i < count; i++) {
3442 		struct user_iovec *iovp;
3443 		struct user_msghdr_x *user_msg = dst + i;
3444 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3445 
3446 		if (spacetype == UIO_USERSPACE64) {
3447 			const struct user64_msghdr_x *msghdr64;
3448 
3449 			msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3450 
3451 			user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
3452 			user_msg->msg_namelen = msghdr64->msg_namelen;
3453 			user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
3454 			user_msg->msg_iovlen = msghdr64->msg_iovlen;
3455 			user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
3456 			user_msg->msg_controllen = msghdr64->msg_controllen;
3457 			user_msg->msg_flags = msghdr64->msg_flags;
3458 			user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
3459 		} else {
3460 			const struct user32_msghdr_x *msghdr32;
3461 
3462 			msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3463 
3464 			user_msg->msg_name = msghdr32->msg_name;
3465 			user_msg->msg_namelen = msghdr32->msg_namelen;
3466 			user_msg->msg_iov = msghdr32->msg_iov;
3467 			user_msg->msg_iovlen = msghdr32->msg_iovlen;
3468 			user_msg->msg_control = msghdr32->msg_control;
3469 			user_msg->msg_controllen = msghdr32->msg_controllen;
3470 			user_msg->msg_flags = msghdr32->msg_flags;
3471 			user_msg->msg_datalen = msghdr32->msg_datalen;
3472 		}
3473 
3474 		if (user_msg->msg_iovlen <= 0 ||
3475 		    user_msg->msg_iovlen > UIO_MAXIOV) {
3476 			error = EMSGSIZE;
3477 			goto done;
3478 		}
3479 		recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3480 		    spacetype, direction);
3481 		if (recv_msg_elem->uio == NULL) {
3482 			error = ENOMEM;
3483 			goto done;
3484 		}
3485 
3486 		iovp = uio_iovsaddr_user(recv_msg_elem->uio);
3487 		if (iovp == NULL) {
3488 			error = ENOMEM;
3489 			goto done;
3490 		}
3491 		error = copyin_user_iovec_array(user_msg->msg_iov,
3492 		    spacetype, user_msg->msg_iovlen, iovp);
3493 		if (error) {
3494 			goto done;
3495 		}
3496 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3497 
3498 		error = uio_calculateresid_user(recv_msg_elem->uio);
3499 		if (error) {
3500 			goto done;
3501 		}
3502 		user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3503 
3504 		if (user_msg->msg_name && user_msg->msg_namelen) {
3505 			recv_msg_elem->which |= SOCK_MSG_SA;
3506 		}
3507 		if (user_msg->msg_control && user_msg->msg_controllen) {
3508 			recv_msg_elem->which |= SOCK_MSG_CONTROL;
3509 		}
3510 	}
3511 done:
3512 
3513 	return error;
3514 }
3515 
3516 u_int
externalize_recv_msghdr_array(proc_ref_t p,socket_ref_t so,void_ptr_t dst,u_int count,user_msghdr_x_ptr_t src,recv_msg_elem_ptr_t recv_msg_array,int_ref_t ret_error)3517 externalize_recv_msghdr_array(proc_ref_t p, socket_ref_t so, void_ptr_t dst,
3518     u_int count, user_msghdr_x_ptr_t src,
3519     recv_msg_elem_ptr_t recv_msg_array, int_ref_t ret_error)
3520 {
3521 	u_int i;
3522 	u_int retcnt = 0;
3523 	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3524 
3525 	*ret_error = 0;
3526 
3527 	for (i = 0; i < count; i++) {
3528 		struct user_msghdr_x *user_msg = src + i;
3529 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3530 		user_ssize_t len = 0;
3531 		int error;
3532 
3533 		len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3534 
3535 		if ((recv_msg_elem->which & SOCK_MSG_DATA)) {
3536 			retcnt++;
3537 
3538 			if (recv_msg_elem->which & SOCK_MSG_SA) {
3539 				error = copyout_sa(recv_msg_elem->psa, user_msg->msg_name,
3540 				    &user_msg->msg_namelen);
3541 				if (error != 0) {
3542 					*ret_error = error;
3543 					return 0;
3544 				}
3545 			}
3546 			if (recv_msg_elem->which & SOCK_MSG_CONTROL) {
3547 				error = copyout_control(p, recv_msg_elem->controlp,
3548 				    user_msg->msg_control, &user_msg->msg_controllen,
3549 				    &recv_msg_elem->flags, so);
3550 				if (error != 0) {
3551 					*ret_error = error;
3552 					return 0;
3553 				}
3554 			}
3555 		}
3556 
3557 		if (spacetype == UIO_USERSPACE64) {
3558 			struct user64_msghdr_x *msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3559 
3560 			msghdr64->msg_namelen = user_msg->msg_namelen;
3561 			msghdr64->msg_controllen = user_msg->msg_controllen;
3562 			msghdr64->msg_flags = recv_msg_elem->flags;
3563 			msghdr64->msg_datalen = len;
3564 		} else {
3565 			struct user32_msghdr_x *msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3566 
3567 			msghdr32->msg_namelen = user_msg->msg_namelen;
3568 			msghdr32->msg_controllen = user_msg->msg_controllen;
3569 			msghdr32->msg_flags = recv_msg_elem->flags;
3570 			msghdr32->msg_datalen = (user32_size_t)len;
3571 		}
3572 	}
3573 	return retcnt;
3574 }
3575 
3576 recv_msg_elem_ptr_t
alloc_recv_msg_array(u_int count)3577 alloc_recv_msg_array(u_int count)
3578 {
3579 	return kalloc_type(struct recv_msg_elem, count, Z_WAITOK | Z_ZERO);
3580 }
3581 
3582 void
free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array,u_int count)3583 free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3584 {
3585 	if (recv_msg_array == NULL) {
3586 		return;
3587 	}
3588 	for (uint32_t i = 0; i < count; i++) {
3589 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3590 
3591 		if (recv_msg_elem->uio != NULL) {
3592 			uio_free(recv_msg_elem->uio);
3593 		}
3594 		free_sockaddr(recv_msg_elem->psa);
3595 		if (recv_msg_elem->controlp != NULL) {
3596 			m_freem(recv_msg_elem->controlp);
3597 		}
3598 	}
3599 	kfree_type(struct recv_msg_elem, count, recv_msg_array);
3600 }
3601 
3602 
3603 /* Extern linkage requires using __counted_by instead of bptr */
3604 __private_extern__ user_ssize_t
recv_msg_array_resid(struct recv_msg_elem * __counted_by (count)recv_msg_array,u_int count)3605 recv_msg_array_resid(struct recv_msg_elem * __counted_by(count)recv_msg_array, u_int count)
3606 {
3607 	user_ssize_t len = 0;
3608 	u_int i;
3609 
3610 	for (i = 0; i < count; i++) {
3611 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3612 
3613 		if (recv_msg_elem->uio != NULL) {
3614 			len += uio_resid(recv_msg_elem->uio);
3615 		}
3616 	}
3617 	return len;
3618 }
3619 
3620 int
recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array,u_int count)3621 recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3622 {
3623 	user_ssize_t len = 0;
3624 	u_int i;
3625 
3626 	for (i = 0; i < count; i++) {
3627 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3628 
3629 		if (recv_msg_elem->uio != NULL) {
3630 			user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3631 
3632 			/*
3633 			 * Sanity check on the validity of the iovec:
3634 			 * no point of going over sb_max
3635 			 */
3636 			if (resid < 0 || (u_int32_t)resid > sb_max) {
3637 				return 0;
3638 			}
3639 
3640 			len += resid;
3641 			if (len < 0 || (u_int32_t)len > sb_max) {
3642 				return 0;
3643 			}
3644 		}
3645 	}
3646 	return 1;
3647 }
3648 
3649 #if SENDFILE
3650 
3651 #define SFUIOBUFS 64
3652 
3653 /* Macros to compute the number of mbufs needed depending on cluster size */
3654 #define HOWMANY_16K(n)  ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3655 #define HOWMANY_4K(n)   ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3656 
3657 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3658 #define SENDFILE_MAX_BYTES      (SFUIOBUFS << PGSHIFT)
3659 
3660 /* Upper send limit in the number of mbuf clusters */
3661 #define SENDFILE_MAX_16K        HOWMANY_16K(SENDFILE_MAX_BYTES)
3662 #define SENDFILE_MAX_4K         HOWMANY_4K(SENDFILE_MAX_BYTES)
3663 
3664 static void
alloc_sendpkt(int how,size_t pktlen,unsigned int * maxchunks,mbuf_ref_ref_t m,boolean_t jumbocl)3665 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3666     mbuf_ref_ref_t m, boolean_t jumbocl)
3667 {
3668 	unsigned int needed;
3669 
3670 	if (pktlen == 0) {
3671 		panic("%s: pktlen (%ld) must be non-zero", __func__, pktlen);
3672 	}
3673 
3674 	/*
3675 	 * Try to allocate for the whole thing.  Since we want full control
3676 	 * over the buffer size and be able to accept partial result, we can't
3677 	 * use mbuf_allocpacket().  The logic below is similar to sosend().
3678 	 */
3679 	*m = NULL;
3680 	if (pktlen > MBIGCLBYTES && jumbocl) {
3681 		needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3682 		*m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3683 	}
3684 	if (*m == NULL) {
3685 		needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3686 		*m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3687 	}
3688 
3689 	/*
3690 	 * Our previous attempt(s) at allocation had failed; the system
3691 	 * may be short on mbufs, and we want to block until they are
3692 	 * available.  This time, ask just for 1 mbuf and don't return
3693 	 * until we get it.
3694 	 */
3695 	if (*m == NULL) {
3696 		needed = 1;
3697 		*m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3698 	}
3699 	if (*m == NULL) {
3700 		panic("%s: blocking allocation returned NULL", __func__);
3701 	}
3702 
3703 	*maxchunks = needed;
3704 }
3705 
3706 /*
3707  * sendfile(2).
3708  * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3709  *	 struct sf_hdtr *hdtr, int flags)
3710  *
3711  * Send a file specified by 'fd' and starting at 'offset' to a socket
3712  * specified by 's'. Send only '*nbytes' of the file or until EOF if
3713  * *nbytes == 0. Optionally add a header and/or trailer to the socket
3714  * output. If specified, write the total number of bytes sent into *nbytes.
3715  */
3716 int
sendfile(proc_ref_t p,struct sendfile_args * uap,__unused int * retval)3717 sendfile(proc_ref_t p, struct sendfile_args *uap, __unused int *retval)
3718 {
3719 	fileproc_ref_t  fp;
3720 	vnode_ref_t  vp;
3721 	socket_ref_t so;
3722 	struct writev_nocancel_args nuap;
3723 	user_ssize_t writev_retval;
3724 	struct user_sf_hdtr user_hdtr;
3725 	struct user32_sf_hdtr user32_hdtr;
3726 	struct user64_sf_hdtr user64_hdtr;
3727 	off_t off, xfsize;
3728 	off_t nbytes = 0, sbytes = 0;
3729 	int error = 0;
3730 	size_t sizeof_hdtr;
3731 	off_t file_size;
3732 	struct vfs_context context = *vfs_context_current();
3733 	bool got_vnode_ref = false;
3734 
3735 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
3736 
3737 	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3738 	    0, 0, 0, 0);
3739 
3740 	AUDIT_ARG(fd, uap->fd);
3741 	AUDIT_ARG(value32, uap->s);
3742 
3743 	/*
3744 	 * Do argument checking. Must be a regular file in, stream
3745 	 * type and connected socket out, positive offset.
3746 	 */
3747 	if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3748 		goto done;
3749 	}
3750 	if ((error = vnode_getwithref(vp))) {
3751 		goto done;
3752 	}
3753 	got_vnode_ref = true;
3754 
3755 	if ((fp->f_flag & FREAD) == 0) {
3756 		error = EBADF;
3757 		goto done1;
3758 	}
3759 	if (vnode_isreg(vp) == 0) {
3760 		error = ENOTSUP;
3761 		goto done1;
3762 	}
3763 	error = file_socket(uap->s, &so);
3764 	if (error) {
3765 		goto done1;
3766 	}
3767 	if (so == NULL) {
3768 		error = EBADF;
3769 		goto done2;
3770 	}
3771 	if (so->so_type != SOCK_STREAM) {
3772 		error = EINVAL;
3773 		goto done2;
3774 	}
3775 	if ((so->so_state & SS_ISCONNECTED) == 0) {
3776 		error = ENOTCONN;
3777 		goto done2;
3778 	}
3779 	if (uap->offset < 0) {
3780 		error = EINVAL;
3781 		goto done2;
3782 	}
3783 	if (uap->nbytes == USER_ADDR_NULL) {
3784 		error = EINVAL;
3785 		goto done2;
3786 	}
3787 	if (uap->flags != 0) {
3788 		error = EINVAL;
3789 		goto done2;
3790 	}
3791 
3792 	context.vc_ucred = fp->fp_glob->fg_cred;
3793 
3794 #if CONFIG_MACF_SOCKET_SUBSET
3795 	/* JMM - fetch connected sockaddr? */
3796 	error = mac_socket_check_send(context.vc_ucred, so, NULL);
3797 	if (error) {
3798 		goto done2;
3799 	}
3800 #endif
3801 
3802 	/*
3803 	 * Get number of bytes to send
3804 	 * Should it applies to size of header and trailer?
3805 	 */
3806 	error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3807 	if (error) {
3808 		goto done2;
3809 	}
3810 
3811 	/*
3812 	 * If specified, get the pointer to the sf_hdtr struct for
3813 	 * any headers/trailers.
3814 	 */
3815 	if (uap->hdtr != USER_ADDR_NULL) {
3816 		caddr_t hdtrp;
3817 
3818 		bzero(&user_hdtr, sizeof(user_hdtr));
3819 		if (is_p_64bit_process) {
3820 			hdtrp = (caddr_t)&user64_hdtr;
3821 			sizeof_hdtr = sizeof(user64_hdtr);
3822 		} else {
3823 			hdtrp = (caddr_t)&user32_hdtr;
3824 			sizeof_hdtr = sizeof(user32_hdtr);
3825 		}
3826 		error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3827 		if (error) {
3828 			goto done2;
3829 		}
3830 		if (is_p_64bit_process) {
3831 			user_hdtr.headers = user64_hdtr.headers;
3832 			user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3833 			user_hdtr.trailers = user64_hdtr.trailers;
3834 			user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3835 		} else {
3836 			user_hdtr.headers = user32_hdtr.headers;
3837 			user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3838 			user_hdtr.trailers = user32_hdtr.trailers;
3839 			user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3840 		}
3841 
3842 		/*
3843 		 * Send any headers. Wimp out and use writev(2).
3844 		 */
3845 		if (user_hdtr.headers != USER_ADDR_NULL) {
3846 			bzero(&nuap, sizeof(struct writev_args));
3847 			nuap.fd = uap->s;
3848 			nuap.iovp = user_hdtr.headers;
3849 			nuap.iovcnt = user_hdtr.hdr_cnt;
3850 			error = writev_nocancel(p, &nuap, &writev_retval);
3851 			if (error) {
3852 				goto done2;
3853 			}
3854 			sbytes += writev_retval;
3855 		}
3856 	}
3857 
3858 	/*
3859 	 * Get the file size for 2 reasons:
3860 	 *  1. We don't want to allocate more mbufs than necessary
3861 	 *  2. We don't want to read past the end of file
3862 	 */
3863 	if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3864 		goto done2;
3865 	}
3866 
3867 	/*
3868 	 * Simply read file data into a chain of mbufs that used with scatter
3869 	 * gather reads. We're not (yet?) setup to use zero copy external
3870 	 * mbufs that point to the file pages.
3871 	 */
3872 	socket_lock(so, 1);
3873 	error = sblock(&so->so_snd, SBL_WAIT);
3874 	if (error) {
3875 		socket_unlock(so, 1);
3876 		goto done2;
3877 	}
3878 	for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3879 		mbuf_ref_t m0 = NULL;
3880 		mbuf_t  m;
3881 		unsigned int    nbufs = SFUIOBUFS, i;
3882 		uio_t   auio;
3883 		UIO_STACKBUF(uio_buf, SFUIOBUFS);               /* 1KB !!! */
3884 		size_t  uiolen;
3885 		user_ssize_t    rlen;
3886 		off_t   pgoff;
3887 		size_t  pktlen;
3888 		boolean_t jumbocl;
3889 
3890 		/*
3891 		 * Calculate the amount to transfer.
3892 		 * Align to round number of pages.
3893 		 * Not to exceed send socket buffer,
3894 		 * the EOF, or the passed in nbytes.
3895 		 */
3896 		xfsize = sbspace(&so->so_snd);
3897 
3898 		if (xfsize <= 0) {
3899 			if (so->so_state & SS_CANTSENDMORE) {
3900 				error = EPIPE;
3901 				goto done3;
3902 			} else if ((so->so_state & SS_NBIO)) {
3903 				error = EAGAIN;
3904 				goto done3;
3905 			} else {
3906 				xfsize = PAGE_SIZE;
3907 			}
3908 		}
3909 
3910 		if (xfsize > SENDFILE_MAX_BYTES) {
3911 			xfsize = SENDFILE_MAX_BYTES;
3912 		} else if (xfsize > PAGE_SIZE) {
3913 			xfsize = trunc_page(xfsize);
3914 		}
3915 		pgoff = off & PAGE_MASK_64;
3916 		if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3917 			xfsize = PAGE_SIZE_64 - pgoff;
3918 		}
3919 		if (nbytes && xfsize > (nbytes - sbytes)) {
3920 			xfsize = nbytes - sbytes;
3921 		}
3922 		if (xfsize <= 0) {
3923 			break;
3924 		}
3925 		if (off + xfsize > file_size) {
3926 			xfsize = file_size - off;
3927 		}
3928 		if (xfsize <= 0) {
3929 			break;
3930 		}
3931 
3932 		/*
3933 		 * Attempt to use larger than system page-size clusters for
3934 		 * large writes only if there is a jumbo cluster pool and
3935 		 * if the socket is marked accordingly.
3936 		 */
3937 		jumbocl = sosendjcl && njcl > 0 &&
3938 		    ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3939 
3940 		socket_unlock(so, 0);
3941 		alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3942 		pktlen = mbuf_pkthdr_maxlen(m0);
3943 		if (pktlen < (size_t)xfsize) {
3944 			xfsize = pktlen;
3945 		}
3946 
3947 		auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3948 		    UIO_READ, &uio_buf[0], sizeof(uio_buf));
3949 		if (auio == NULL) {
3950 			DBG_PRINTF("sendfile failed. nbufs = %d. %s", nbufs,
3951 			    "File a radar related to rdar://10146739.\n");
3952 			mbuf_freem(m0);
3953 			error = ENXIO;
3954 			socket_lock(so, 0);
3955 			goto done3;
3956 		}
3957 
3958 		for (i = 0, m = m0, uiolen = 0;
3959 		    i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3960 		    i++, m = mbuf_next(m)) {
3961 			size_t mlen = mbuf_maxlen(m);
3962 
3963 			if (mlen + uiolen > (size_t)xfsize) {
3964 				mlen = xfsize - uiolen;
3965 			}
3966 			mbuf_setlen(m, mlen);
3967 			uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3968 			    mlen);
3969 			uiolen += mlen;
3970 		}
3971 
3972 		if (xfsize != uio_resid(auio)) {
3973 			DBG_PRINTF("sendfile: xfsize: %lld != uio_resid(auio): "
3974 			    "%lld\n", xfsize, (long long)uio_resid(auio));
3975 		}
3976 
3977 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3978 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3979 		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3980 		error = fo_read(fp, auio, FOF_OFFSET, &context);
3981 		socket_lock(so, 0);
3982 		if (error != 0) {
3983 			if (uio_resid(auio) != xfsize && (error == ERESTART ||
3984 			    error == EINTR || error == EWOULDBLOCK)) {
3985 				error = 0;
3986 			} else {
3987 				mbuf_freem(m0);
3988 				goto done3;
3989 			}
3990 		}
3991 		xfsize -= uio_resid(auio);
3992 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3993 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3994 		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3995 
3996 		if (xfsize == 0) {
3997 			break;
3998 		}
3999 		if (xfsize + off > file_size) {
4000 			DBG_PRINTF("sendfile: xfsize: %lld + off: %lld > file_size:"
4001 			    "%lld\n", xfsize, off, file_size);
4002 		}
4003 		for (i = 0, m = m0, rlen = 0;
4004 		    i < nbufs && m != NULL && rlen < xfsize;
4005 		    i++, m = mbuf_next(m)) {
4006 			size_t mlen = mbuf_maxlen(m);
4007 
4008 			if (rlen + mlen > (size_t)xfsize) {
4009 				mlen = xfsize - rlen;
4010 			}
4011 			mbuf_setlen(m, mlen);
4012 
4013 			rlen += mlen;
4014 		}
4015 		mbuf_pkthdr_setlen(m0, xfsize);
4016 
4017 retry_space:
4018 		/*
4019 		 * Make sure that the socket is still able to take more data.
4020 		 * CANTSENDMORE being true usually means that the connection
4021 		 * was closed. so_error is true when an error was sensed after
4022 		 * a previous send.
4023 		 * The state is checked after the page mapping and buffer
4024 		 * allocation above since those operations may block and make
4025 		 * any socket checks stale. From this point forward, nothing
4026 		 * blocks before the pru_send (or more accurately, any blocking
4027 		 * results in a loop back to here to re-check).
4028 		 */
4029 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
4030 			if (so->so_state & SS_CANTSENDMORE) {
4031 				error = EPIPE;
4032 			} else {
4033 				error = so->so_error;
4034 				so->so_error = 0;
4035 			}
4036 			m_freem(m0);
4037 			goto done3;
4038 		}
4039 		/*
4040 		 * Wait for socket space to become available. We do this just
4041 		 * after checking the connection state above in order to avoid
4042 		 * a race condition with sbwait().
4043 		 */
4044 		if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
4045 			if (so->so_state & SS_NBIO) {
4046 				m_freem(m0);
4047 				error = EAGAIN;
4048 				goto done3;
4049 			}
4050 			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
4051 			    DBG_FUNC_START), uap->s, 0, 0, 0, 0);
4052 			error = sbwait(&so->so_snd);
4053 			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
4054 			    DBG_FUNC_END), uap->s, 0, 0, 0, 0);
4055 			/*
4056 			 * An error from sbwait usually indicates that we've
4057 			 * been interrupted by a signal. If we've sent anything
4058 			 * then return bytes sent, otherwise return the error.
4059 			 */
4060 			if (error) {
4061 				m_freem(m0);
4062 				goto done3;
4063 			}
4064 			goto retry_space;
4065 		}
4066 
4067 		mbuf_ref_t  control = NULL;
4068 		{
4069 			/*
4070 			 * Socket filter processing
4071 			 */
4072 
4073 			error = sflt_data_out(so, NULL, &m0, &control, 0);
4074 			if (error) {
4075 				if (error == EJUSTRETURN) {
4076 					error = 0;
4077 					continue;
4078 				}
4079 				goto done3;
4080 			}
4081 			/*
4082 			 * End Socket filter processing
4083 			 */
4084 		}
4085 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
4086 		    uap->s, 0, 0, 0, 0);
4087 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
4088 		    NULL, control, p);
4089 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
4090 		    uap->s, 0, 0, 0, 0);
4091 		if (error) {
4092 			goto done3;
4093 		}
4094 	}
4095 	sbunlock(&so->so_snd, FALSE);   /* will unlock socket */
4096 	/*
4097 	 * Send trailers. Wimp out and use writev(2).
4098 	 */
4099 	if (uap->hdtr != USER_ADDR_NULL &&
4100 	    user_hdtr.trailers != USER_ADDR_NULL) {
4101 		bzero(&nuap, sizeof(struct writev_args));
4102 		nuap.fd = uap->s;
4103 		nuap.iovp = user_hdtr.trailers;
4104 		nuap.iovcnt = user_hdtr.trl_cnt;
4105 		error = writev_nocancel(p, &nuap, &writev_retval);
4106 		if (error) {
4107 			goto done2;
4108 		}
4109 		sbytes += writev_retval;
4110 	}
4111 done2:
4112 	file_drop(uap->s);
4113 done1:
4114 	file_drop(uap->fd);
4115 done:
4116 	if (got_vnode_ref) {
4117 		vnode_put(vp);
4118 	}
4119 	if (uap->nbytes != USER_ADDR_NULL) {
4120 		/* XXX this appears bogus for some early failure conditions */
4121 		copyout(&sbytes, uap->nbytes, sizeof(off_t));
4122 	}
4123 	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
4124 	    (unsigned int)((sbytes >> 32) & 0x0ffffffff),
4125 	    (unsigned int)(sbytes & 0x0ffffffff), error, 0);
4126 	return error;
4127 done3:
4128 	sbunlock(&so->so_snd, FALSE);   /* will unlock socket */
4129 	goto done2;
4130 }
4131 
4132 
4133 #endif /* SENDFILE */
4134