xref: /xnu-10002.61.3/bsd/kern/uipc_syscalls.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1989, 1990, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * sendfile(2) and related extensions:
33  * Copyright (c) 1998, David Greenman. All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgement:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  * 4. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  *
63  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
64  */
65 /*
66  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67  * support for mandatory and extensible security protections.  This notice
68  * is included in support of clause 2.2 (b) of the Apple Public License,
69  * Version 2.0.
70  */
71 
72 #include <sys/cdefs.h>
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/filedesc.h>
76 #include <sys/proc_internal.h>
77 #include <sys/file_internal.h>
78 #include <sys/vnode_internal.h>
79 #include <sys/malloc.h>
80 #include <sys/mcache.h>
81 #include <sys/mbuf.h>
82 #include <kern/locks.h>
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/signalvar.h>
86 #include <sys/socket.h>
87 #include <sys/socketvar.h>
88 #include <sys/kernel.h>
89 #include <sys/uio_internal.h>
90 #include <sys/kauth.h>
91 #include <kern/task.h>
92 #include <sys/priv.h>
93 #include <sys/sysctl.h>
94 #include <sys/sys_domain.h>
95 #include <sys/types.h>
96 
97 #include <security/audit/audit.h>
98 
99 #include <sys/kdebug.h>
100 #include <sys/sysproto.h>
101 #include <netinet/in.h>
102 #include <net/route.h>
103 #include <netinet/in_pcb.h>
104 
105 #include <os/log.h>
106 #include <os/ptrtools.h>
107 
108 #include <os/log.h>
109 
110 #if CONFIG_MACF_SOCKET_SUBSET
111 #include <security/mac_framework.h>
112 #endif /* MAC_SOCKET_SUBSET */
113 
114 #define f_flag fp_glob->fg_flag
115 #define f_ops fp_glob->fg_ops
116 
117 #define DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETSOCK, 0)
118 #define DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETSOCK, 2)
119 #define DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETSOCK, 1)
120 #define DBG_LAYER_OUT_END       NETDBG_CODE(DBG_NETSOCK, 3)
121 #define DBG_FNC_SENDMSG         NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
122 #define DBG_FNC_SENDTO          NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
123 #define DBG_FNC_SENDIT          NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
124 #define DBG_FNC_RECVFROM        NETDBG_CODE(DBG_NETSOCK, (5 << 8))
125 #define DBG_FNC_RECVMSG         NETDBG_CODE(DBG_NETSOCK, (6 << 8))
126 #define DBG_FNC_RECVIT          NETDBG_CODE(DBG_NETSOCK, (7 << 8))
127 #define DBG_FNC_SENDFILE        NETDBG_CODE(DBG_NETSOCK, (10 << 8))
128 #define DBG_FNC_SENDFILE_WAIT   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
129 #define DBG_FNC_SENDFILE_READ   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
130 #define DBG_FNC_SENDFILE_SEND   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
131 #define DBG_FNC_SENDMSG_X       NETDBG_CODE(DBG_NETSOCK, (11 << 8))
132 #define DBG_FNC_RECVMSG_X       NETDBG_CODE(DBG_NETSOCK, (12 << 8))
133 
134 /* Forward declarations for referenced types */
135 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(void, void, __CCT_PTR);
136 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(uint8_t, uint8_t, __CCT_PTR);
137 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int32_t, int32, __CCT_REF);
138 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int, int, __CCT_REF);
139 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(user_ssize_t, user_ssize, __CCT_REF);
140 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(unsigned int, uint, __CCT_REF);
141 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(sae_connid_t, sae_connid, __CCT_REF);
142 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(socklen_t, socklen, __CCT_REF);
143 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct setsockopt_args, setsockopt_args, __CCT_REF);
144 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct connectx_args, connectx_args, __CCT_REF);
145 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct disconnectx_args, disconnectx_args, __CCT_REF);
146 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct cmsghdr, cmsghdr, __CCT_REF);
147 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct timeval, timeval, __CCT_REF);
148 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user64_timeval, user64_timeval, __CCT_REF);
149 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user32_timeval, user32_timeval, __CCT_REF);
150 
151 static int sendit(proc_ref_t, socket_ref_t, user_msghdr_ref_t, uio_t,
152     int, int32_ref_t );
153 static int recvit(proc_ref_t, int, user_msghdr_ref_t, uio_t, user_addr_t,
154     int32_ref_t);
155 static int connectit(socket_ref_t, sockaddr_ref_t);
156 static int getsockaddr(socket_ref_t, sockaddr_ref_ref_t, user_addr_t,
157     size_t, boolean_t);
158 static int getsockaddr_s(socket_ref_t, sockaddr_storage_ref_t,
159     user_addr_t, size_t, boolean_t);
160 #if SENDFILE
161 static void alloc_sendpkt(int, size_t, uint_ref_t, mbuf_ref_ref_t,
162     boolean_t);
163 #endif /* SENDFILE */
164 static int connectx_nocancel(proc_ref_t, connectx_args_ref_t, int_ref_t);
165 static int connectitx(socket_ref_t, sockaddr_ref_t,
166     sockaddr_ref_t, proc_ref_t, uint32_t, sae_associd_t,
167     sae_connid_ref_t, uio_t, unsigned int, user_ssize_ref_t);
168 static int disconnectx_nocancel(proc_ref_t, disconnectx_args_ref_t,
169     int_ref_t);
170 static int socket_common(proc_ref_t, int, int, int, pid_t, int32_ref_t, int);
171 
172 #if DEBUG || DEVELOPMENT
173 static int internalize_user_msghdr_array(const void_ptr_t, int, int,
174     u_int count, user_msghdr_x_ptr_t, uio_ref_ptr_t);
175 
176 static void externalize_user_msghdr_array(void_ptr_t, int, int, u_int count,
177     const user_msghdr_x_ptr_t, uio_ref_ptr_t);
178 
179 static void free_uio_array(uio_ref_ptr_t, u_int count);
180 static boolean_t uio_array_is_valid(uio_ref_ptr_t, u_int count);
181 #endif /* DEBUG || DEVELOPMENT */
182 static int internalize_recv_msghdr_array(const void_ptr_t, int, int,
183     u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t);
184 static u_int externalize_recv_msghdr_array(proc_ref_t, socket_ref_t, void_ptr_t,
185     u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t, int_ref_t);
186 
187 static recv_msg_elem_ptr_t alloc_recv_msg_array(u_int count);
188 static int recv_msg_array_is_valid(recv_msg_elem_ptr_t, u_int count);
189 static void free_recv_msg_array(recv_msg_elem_ptr_t, u_int count);
190 static int copyout_control(proc_ref_t, mbuf_ref_t, user_addr_t control,
191     socklen_ref_t, int_ref_t, socket_ref_t);
192 
193 SYSCTL_DECL(_kern_ipc);
194 
195 #define SO_MAX_MSG_X_DEFAULT 256
196 
197 static u_int somaxsendmsgx = SO_MAX_MSG_X_DEFAULT;
198 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
199     CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
200 
201 static u_int somaxrecvmsgx = SO_MAX_MSG_X_DEFAULT;
202 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
203     CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
204 
205 static u_int missingpktinfo = 0;
206 SYSCTL_UINT(_kern_ipc, OID_AUTO, missingpktinfo,
207     CTLFLAG_RD | CTLFLAG_LOCKED, &missingpktinfo, 0, "");
208 
209 static int do_recvmsg_x_donttrunc = 0;
210 SYSCTL_INT(_kern_ipc, OID_AUTO, do_recvmsg_x_donttrunc,
211     CTLFLAG_RW | CTLFLAG_LOCKED, &do_recvmsg_x_donttrunc, 0, "");
212 
213 #if DEBUG || DEVELOPMENT
214 static int uipc_debug = 0;
215 SYSCTL_INT(_kern_ipc, OID_AUTO, debug,
216     CTLFLAG_RW | CTLFLAG_LOCKED, &uipc_debug, 0, "");
217 
218 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
219 #define DBG_PRINTF(...) if (uipc_debug != 0) {  \
220     os_log(OS_LOG_DEFAULT, __VA_ARGS__);        \
221 }
222 #else
223 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
224 #define DBG_PRINTF(...) do { } while (0)
225 #endif
226 
227 
228 /*
229  * Values for sendmsg_x_mode
230  * 0: default
231  * 1: sendit loop one at a time
232  * 2: old implementation
233  */
234 static u_int sendmsg_x_mode = 0;
235 SYSCTL_UINT(_kern_ipc, OID_AUTO, sendmsg_x_mode,
236     CTLFLAG_RW | CTLFLAG_LOCKED, &sendmsg_x_mode, 0, "");
237 
238 /*
239  * System call interface to the socket abstraction.
240  */
241 
242 extern const struct fileops socketops;
243 
244 /*
245  * Returns:	0			Success
246  *		EACCES			Mandatory Access Control failure
247  *	falloc:ENFILE
248  *	falloc:EMFILE
249  *	falloc:ENOMEM
250  *	socreate:EAFNOSUPPORT
251  *	socreate:EPROTOTYPE
252  *	socreate:EPROTONOSUPPORT
253  *	socreate:ENOBUFS
254  *	socreate:ENOMEM
255  *	socreate:???			[other protocol families, IPSEC]
256  */
257 int
socket(proc_ref_t p,struct socket_args * uap,int32_ref_t retval)258 socket(proc_ref_t p,
259     struct socket_args *uap,
260     int32_ref_t retval)
261 {
262 	return socket_common(p, uap->domain, uap->type, uap->protocol,
263 	           proc_selfpid(), retval, 0);
264 }
265 
266 int
socket_delegate(proc_ref_t p,struct socket_delegate_args * uap,int32_ref_t retval)267 socket_delegate(proc_ref_t p,
268     struct socket_delegate_args *uap,
269     int32_ref_t retval)
270 {
271 	return socket_common(p, uap->domain, uap->type, uap->protocol,
272 	           uap->epid, retval, 1);
273 }
274 
275 static int
socket_common(proc_ref_t p,int domain,int type,int protocol,pid_t epid,int32_ref_t retval,int delegate)276 socket_common(proc_ref_t p,
277     int domain,
278     int type,
279     int protocol,
280     pid_t epid,
281     int32_ref_t retval,
282     int delegate)
283 {
284 	socket_ref_t so;
285 	fileproc_ref_t  fp;
286 	int fd, error;
287 
288 	AUDIT_ARG(socket, domain, type, protocol);
289 #if CONFIG_MACF_SOCKET_SUBSET
290 	if ((error = mac_socket_check_create(kauth_cred_get(), domain,
291 	    type, protocol)) != 0) {
292 		return error;
293 	}
294 #endif /* MAC_SOCKET_SUBSET */
295 
296 	if (delegate) {
297 		error = priv_check_cred(kauth_cred_get(),
298 		    PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
299 		if (error) {
300 			return EACCES;
301 		}
302 	}
303 
304 	error = falloc(p, &fp, &fd, vfs_context_current());
305 	if (error) {
306 		return error;
307 	}
308 	fp->f_flag = FREAD | FWRITE;
309 	fp->f_ops = &socketops;
310 
311 	if (delegate) {
312 		error = socreate_delegate(domain, &so, type, protocol, epid);
313 	} else {
314 		error = socreate(domain, &so, type, protocol);
315 	}
316 
317 	if (error) {
318 		fp_free(p, fd, fp);
319 	} else {
320 		fp_set_data(fp, so);
321 
322 		proc_fdlock(p);
323 		procfdtbl_releasefd(p, fd, NULL);
324 
325 		if (ENTR_SHOULDTRACE) {
326 			KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
327 			    fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
328 		}
329 		fp_drop(p, fd, fp, 1);
330 		proc_fdunlock(p);
331 
332 		*retval = fd;
333 	}
334 	return error;
335 }
336 
337 /*
338  * Returns:	0			Success
339  *		EDESTADDRREQ		Destination address required
340  *		EBADF			Bad file descriptor
341  *		EACCES			Mandatory Access Control failure
342  *	file_socket:ENOTSOCK
343  *	file_socket:EBADF
344  *	getsockaddr:ENAMETOOLONG	Filename too long
345  *	getsockaddr:EINVAL		Invalid argument
346  *	getsockaddr:ENOMEM		Not enough space
347  *	getsockaddr:EFAULT		Bad address
348  *	sobindlock:???
349  */
350 /* ARGSUSED */
351 int
bind(__unused proc_t p,struct bind_args * uap,__unused int32_ref_t retval)352 bind(__unused proc_t p, struct bind_args *uap, __unused int32_ref_t retval)
353 {
354 	struct sockaddr_storage ss;
355 	sockaddr_ref_t  sa = NULL;
356 	socket_ref_t so;
357 	boolean_t want_free = TRUE;
358 	int error;
359 
360 	AUDIT_ARG(fd, uap->s);
361 	error = file_socket(uap->s, &so);
362 	if (error != 0) {
363 		return error;
364 	}
365 	if (so == NULL) {
366 		error = EBADF;
367 		goto out;
368 	}
369 	if (uap->name == USER_ADDR_NULL) {
370 		error = EDESTADDRREQ;
371 		goto out;
372 	}
373 	if (uap->namelen > sizeof(ss)) {
374 		error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
375 	} else {
376 		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
377 		if (error == 0) {
378 			sa = (sockaddr_ref_t)&ss;
379 			want_free = FALSE;
380 		}
381 	}
382 	if (error != 0) {
383 		goto out;
384 	}
385 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
386 #if CONFIG_MACF_SOCKET_SUBSET
387 	if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
388 	    (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
389 		error = sobindlock(so, sa, 1);  /* will lock socket */
390 	}
391 #else
392 	error = sobindlock(so, sa, 1);          /* will lock socket */
393 #endif /* MAC_SOCKET_SUBSET */
394 	if (want_free) {
395 		free_sockaddr(sa);
396 	}
397 out:
398 	file_drop(uap->s);
399 	return error;
400 }
401 
402 /*
403  * Returns:	0			Success
404  *		EBADF
405  *		EACCES			Mandatory Access Control failure
406  *	file_socket:ENOTSOCK
407  *	file_socket:EBADF
408  *	solisten:EINVAL
409  *	solisten:EOPNOTSUPP
410  *	solisten:???
411  */
412 int
listen(__unused proc_ref_t p,struct listen_args * uap,__unused int32_ref_t retval)413 listen(__unused proc_ref_t p, struct listen_args *uap,
414     __unused int32_ref_t retval)
415 {
416 	int error;
417 	socket_ref_t so;
418 
419 	AUDIT_ARG(fd, uap->s);
420 	error = file_socket(uap->s, &so);
421 	if (error) {
422 		return error;
423 	}
424 	if (so != NULL)
425 #if CONFIG_MACF_SOCKET_SUBSET
426 	{
427 		error = mac_socket_check_listen(kauth_cred_get(), so);
428 		if (error == 0) {
429 			error = solisten(so, uap->backlog);
430 		}
431 	}
432 #else
433 	{ error = solisten(so, uap->backlog);}
434 #endif /* MAC_SOCKET_SUBSET */
435 	else {
436 		error = EBADF;
437 	}
438 
439 	file_drop(uap->s);
440 	return error;
441 }
442 
443 /*
444  * Returns:	fp_get_ftype:EBADF	Bad file descriptor
445  *		fp_get_ftype:ENOTSOCK	Socket operation on non-socket
446  *		:EFAULT			Bad address on copyin/copyout
447  *		:EBADF			Bad file descriptor
448  *		:EOPNOTSUPP		Operation not supported on socket
449  *		:EINVAL			Invalid argument
450  *		:EWOULDBLOCK		Operation would block
451  *		:ECONNABORTED		Connection aborted
452  *		:EINTR			Interrupted function
453  *		:EACCES			Mandatory Access Control failure
454  *		falloc:ENFILE		Too many files open in system
455  *		falloc:EMFILE		Too many open files
456  *		falloc:ENOMEM		Not enough space
457  *		0			Success
458  */
459 int
accept_nocancel(proc_ref_t p,struct accept_nocancel_args * uap,int32_ref_t retval)460 accept_nocancel(proc_ref_t p, struct accept_nocancel_args *uap,
461     int32_ref_t retval)
462 {
463 	fileproc_ref_t  fp;
464 	sockaddr_ref_t  sa = NULL;
465 	socklen_t namelen;
466 	int error;
467 	socket_ref_t  head;
468 	socket_ref_t so = NULL;
469 	lck_mtx_t *mutex_held;
470 	int fd = uap->s;
471 	int newfd;
472 	unsigned int fflag;
473 	int dosocklock = 0;
474 
475 	*retval = -1;
476 
477 	AUDIT_ARG(fd, uap->s);
478 
479 	if (uap->name) {
480 		error = copyin(uap->anamelen, (caddr_t)&namelen,
481 		    sizeof(socklen_t));
482 		if (error) {
483 			return error;
484 		}
485 	}
486 	error = fp_get_ftype(p, fd, DTYPE_SOCKET, ENOTSOCK, &fp);
487 	if (error) {
488 		return error;
489 	}
490 	head = (struct socket *)fp_get_data(fp);
491 
492 #if CONFIG_MACF_SOCKET_SUBSET
493 	if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
494 		goto out;
495 	}
496 #endif /* MAC_SOCKET_SUBSET */
497 
498 	socket_lock(head, 1);
499 
500 	if (head->so_proto->pr_getlock != NULL) {
501 		mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
502 		dosocklock = 1;
503 	} else {
504 		mutex_held = head->so_proto->pr_domain->dom_mtx;
505 		dosocklock = 0;
506 	}
507 
508 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
509 		if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
510 			error = EOPNOTSUPP;
511 		} else {
512 			/* POSIX: The socket is not accepting connections */
513 			error = EINVAL;
514 		}
515 		socket_unlock(head, 1);
516 		goto out;
517 	}
518 check_again:
519 	if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
520 		socket_unlock(head, 1);
521 		error = EWOULDBLOCK;
522 		goto out;
523 	}
524 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
525 		if (head->so_state & SS_CANTRCVMORE) {
526 			head->so_error = ECONNABORTED;
527 			break;
528 		}
529 		if (head->so_usecount < 1) {
530 			panic("accept: head=%p refcount=%d", head,
531 			    head->so_usecount);
532 		}
533 		error = msleep((caddr_t)&head->so_timeo, mutex_held,
534 		    PSOCK | PCATCH, "accept", 0);
535 		if (head->so_usecount < 1) {
536 			panic("accept: 2 head=%p refcount=%d", head,
537 			    head->so_usecount);
538 		}
539 		if ((head->so_state & SS_DRAINING)) {
540 			error = ECONNABORTED;
541 		}
542 		if (error) {
543 			socket_unlock(head, 1);
544 			goto out;
545 		}
546 	}
547 	if (head->so_error) {
548 		error = head->so_error;
549 		head->so_error = 0;
550 		socket_unlock(head, 1);
551 		goto out;
552 	}
553 
554 	/*
555 	 * At this point we know that there is at least one connection
556 	 * ready to be accepted. Remove it from the queue prior to
557 	 * allocating the file descriptor for it since falloc() may
558 	 * block allowing another process to accept the connection
559 	 * instead.
560 	 */
561 	lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
562 
563 	so_acquire_accept_list(head, NULL);
564 	if (TAILQ_EMPTY(&head->so_comp)) {
565 		so_release_accept_list(head);
566 		goto check_again;
567 	}
568 
569 	so = TAILQ_FIRST(&head->so_comp);
570 	TAILQ_REMOVE(&head->so_comp, so, so_list);
571 	/*
572 	 * Acquire the lock of the new connection
573 	 * as we may be in the process of receiving
574 	 * a packet that may change its so_state
575 	 * (e.g.: a TCP FIN).
576 	 */
577 	if (dosocklock) {
578 		socket_lock(so, 0);
579 	}
580 	so->so_head = NULL;
581 	so->so_state &= ~SS_COMP;
582 	if (dosocklock) {
583 		socket_unlock(so, 0);
584 	}
585 	head->so_qlen--;
586 	so_release_accept_list(head);
587 
588 	/* unlock head to avoid deadlock with select, keep a ref on head */
589 	socket_unlock(head, 0);
590 
591 #if CONFIG_MACF_SOCKET_SUBSET
592 	/*
593 	 * Pass the pre-accepted socket to the MAC framework. This is
594 	 * cheaper than allocating a file descriptor for the socket,
595 	 * calling the protocol accept callback, and possibly freeing
596 	 * the file descriptor should the MAC check fails.
597 	 */
598 	if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
599 		socket_lock(so, 1);
600 		so->so_state &= ~SS_NOFDREF;
601 		socket_unlock(so, 1);
602 		soclose(so);
603 		/* Drop reference on listening socket */
604 		sodereference(head);
605 		goto out;
606 	}
607 #endif /* MAC_SOCKET_SUBSET */
608 
609 	/*
610 	 * Pass the pre-accepted socket to any interested socket filter(s).
611 	 * Upon failure, the socket would have been closed by the callee.
612 	 */
613 	if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
614 		/* Drop reference on listening socket */
615 		sodereference(head);
616 		/* Propagate socket filter's error code to the caller */
617 		goto out;
618 	}
619 
620 	fflag = fp->f_flag;
621 	error = falloc(p, &fp, &newfd, vfs_context_current());
622 	if (error) {
623 		/*
624 		 * Probably ran out of file descriptors.
625 		 *
626 		 * <rdar://problem/8554930>
627 		 * Don't put this back on the socket like we used to, that
628 		 * just causes the client to spin. Drop the socket.
629 		 */
630 		socket_lock(so, 1);
631 		so->so_state &= ~SS_NOFDREF;
632 		socket_unlock(so, 1);
633 		soclose(so);
634 		sodereference(head);
635 		goto out;
636 	}
637 	*retval = newfd;
638 	fp->f_flag = fflag;
639 	fp->f_ops = &socketops;
640 	fp_set_data(fp, so);
641 
642 	socket_lock(head, 0);
643 	if (dosocklock) {
644 		socket_lock(so, 1);
645 	}
646 
647 	/* Sync socket non-blocking/async state with file flags */
648 	if (fp->f_flag & FNONBLOCK) {
649 		so->so_state |= SS_NBIO;
650 	} else {
651 		so->so_state &= ~SS_NBIO;
652 	}
653 
654 	if (fp->f_flag & FASYNC) {
655 		so->so_state |= SS_ASYNC;
656 		so->so_rcv.sb_flags |= SB_ASYNC;
657 		so->so_snd.sb_flags |= SB_ASYNC;
658 	} else {
659 		so->so_state &= ~SS_ASYNC;
660 		so->so_rcv.sb_flags &= ~SB_ASYNC;
661 		so->so_snd.sb_flags &= ~SB_ASYNC;
662 	}
663 
664 	(void) soacceptlock(so, &sa, 0);
665 	socket_unlock(head, 1);
666 	if (sa == NULL) {
667 		namelen = 0;
668 		if (uap->name) {
669 			goto gotnoname;
670 		}
671 		error = 0;
672 		goto releasefd;
673 	}
674 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
675 
676 	if (uap->name) {
677 		socklen_t       sa_len;
678 
679 		/* save sa_len before it is destroyed */
680 		sa_len = sa->sa_len;
681 		namelen = MIN(namelen, sa_len);
682 		error = copyout(sa, uap->name, namelen);
683 		if (!error) {
684 			/* return the actual, untruncated address length */
685 			namelen = sa_len;
686 		}
687 gotnoname:
688 		error = copyout((caddr_t)&namelen, uap->anamelen,
689 		    sizeof(socklen_t));
690 	}
691 	free_sockaddr(sa);
692 
693 releasefd:
694 	/*
695 	 * If the socket has been marked as inactive by sosetdefunct(),
696 	 * disallow further operations on it.
697 	 */
698 	if (so->so_flags & SOF_DEFUNCT) {
699 		sodefunct(current_proc(), so,
700 		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
701 	}
702 
703 	if (dosocklock) {
704 		socket_unlock(so, 1);
705 	}
706 
707 	proc_fdlock(p);
708 	procfdtbl_releasefd(p, newfd, NULL);
709 	fp_drop(p, newfd, fp, 1);
710 	proc_fdunlock(p);
711 
712 out:
713 	if (error == 0 && ENTR_SHOULDTRACE) {
714 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
715 		    newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
716 	}
717 
718 	file_drop(fd);
719 	return error;
720 }
721 
722 int
accept(proc_ref_t p,struct accept_args * uap,int32_ref_t retval)723 accept(proc_ref_t p, struct accept_args *uap, int32_ref_t retval)
724 {
725 	__pthread_testcancel(1);
726 	return accept_nocancel(p, (struct accept_nocancel_args *)uap,
727 	           retval);
728 }
729 
730 /*
731  * Returns:	0			Success
732  *		EBADF			Bad file descriptor
733  *		EALREADY		Connection already in progress
734  *		EINPROGRESS		Operation in progress
735  *		ECONNABORTED		Connection aborted
736  *		EINTR			Interrupted function
737  *		EACCES			Mandatory Access Control failure
738  *	file_socket:ENOTSOCK
739  *	file_socket:EBADF
740  *	getsockaddr:ENAMETOOLONG	Filename too long
741  *	getsockaddr:EINVAL		Invalid argument
742  *	getsockaddr:ENOMEM		Not enough space
743  *	getsockaddr:EFAULT		Bad address
744  *	soconnectlock:EOPNOTSUPP
745  *	soconnectlock:EISCONN
746  *	soconnectlock:???		[depends on protocol, filters]
747  *	msleep:EINTR
748  *
749  * Imputed:	so_error		error may be set from so_error, which
750  *					may have been set by soconnectlock.
751  */
752 /* ARGSUSED */
753 int
connect(proc_ref_t p,struct connect_args * uap,int32_ref_t retval)754 connect(proc_ref_t p, struct connect_args *uap, int32_ref_t retval)
755 {
756 	__pthread_testcancel(1);
757 	return connect_nocancel(p, (struct connect_nocancel_args *)uap,
758 	           retval);
759 }
760 
761 int
connect_nocancel(proc_t p,struct connect_nocancel_args * uap,int32_ref_t retval)762 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_ref_t retval)
763 {
764 #pragma unused(p, retval)
765 	socket_ref_t so;
766 	struct sockaddr_storage ss;
767 	sockaddr_ref_t  sa = NULL;
768 	int error;
769 	int fd = uap->s;
770 	boolean_t dgram;
771 
772 	AUDIT_ARG(fd, uap->s);
773 	error = file_socket(fd, &so);
774 	if (error != 0) {
775 		return error;
776 	}
777 	if (so == NULL) {
778 		error = EBADF;
779 		goto out;
780 	}
781 
782 	/*
783 	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
784 	 * if this is a datagram socket; translate for other types.
785 	 */
786 	dgram = (so->so_type == SOCK_DGRAM);
787 
788 	/* Get socket address now before we obtain socket lock */
789 	if (uap->namelen > sizeof(ss)) {
790 		error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
791 	} else {
792 		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
793 		if (error == 0) {
794 			sa = (sockaddr_ref_t)&ss;
795 		}
796 	}
797 	if (error != 0) {
798 		goto out;
799 	}
800 
801 	error = connectit(so, sa);
802 
803 	if (sa != NULL && sa != SA(&ss)) {
804 		free_sockaddr(sa);
805 	}
806 	if (error == ERESTART) {
807 		error = EINTR;
808 	}
809 out:
810 	file_drop(fd);
811 	return error;
812 }
813 
814 static int
connectx_nocancel(proc_ref_t p,connectx_args_ref_t uap,int_ref_t retval)815 connectx_nocancel(proc_ref_t p, connectx_args_ref_t uap, int_ref_t retval)
816 {
817 #pragma unused(p, retval)
818 	struct sockaddr_storage ss, sd;
819 	sockaddr_ref_t  src = NULL, dst = NULL;
820 	socket_ref_t so;
821 	int error, error1, fd = uap->socket;
822 	boolean_t dgram;
823 	sae_connid_t cid = SAE_CONNID_ANY;
824 	struct user32_sa_endpoints ep32;
825 	struct user64_sa_endpoints ep64;
826 	struct user_sa_endpoints ep;
827 	user_ssize_t bytes_written = 0;
828 	struct user_iovec *iovp;
829 	uio_t auio = NULL;
830 
831 	AUDIT_ARG(fd, uap->socket);
832 	error = file_socket(fd, &so);
833 	if (error != 0) {
834 		return error;
835 	}
836 	if (so == NULL) {
837 		error = EBADF;
838 		goto out;
839 	}
840 
841 	if (uap->endpoints == USER_ADDR_NULL) {
842 		error = EINVAL;
843 		goto out;
844 	}
845 
846 	if (IS_64BIT_PROCESS(p)) {
847 		error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
848 		if (error != 0) {
849 			goto out;
850 		}
851 
852 		ep.sae_srcif = ep64.sae_srcif;
853 		ep.sae_srcaddr = (user_addr_t)ep64.sae_srcaddr;
854 		ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
855 		ep.sae_dstaddr = (user_addr_t)ep64.sae_dstaddr;
856 		ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
857 	} else {
858 		error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
859 		if (error != 0) {
860 			goto out;
861 		}
862 
863 		ep.sae_srcif = ep32.sae_srcif;
864 		ep.sae_srcaddr = ep32.sae_srcaddr;
865 		ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
866 		ep.sae_dstaddr = ep32.sae_dstaddr;
867 		ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
868 	}
869 
870 	/*
871 	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
872 	 * if this is a datagram socket; translate for other types.
873 	 */
874 	dgram = (so->so_type == SOCK_DGRAM);
875 
876 	/* Get socket address now before we obtain socket lock */
877 	if (ep.sae_srcaddr != USER_ADDR_NULL) {
878 		if (ep.sae_srcaddrlen > sizeof(ss)) {
879 			error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
880 		} else {
881 			error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
882 			if (error == 0) {
883 				src = (sockaddr_ref_t)&ss;
884 			}
885 		}
886 
887 		if (error) {
888 			goto out;
889 		}
890 	}
891 
892 	if (ep.sae_dstaddr == USER_ADDR_NULL) {
893 		error = EINVAL;
894 		goto out;
895 	}
896 
897 	/* Get socket address now before we obtain socket lock */
898 	if (ep.sae_dstaddrlen > sizeof(sd)) {
899 		error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
900 	} else {
901 		error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
902 		if (error == 0) {
903 			dst = (sockaddr_ref_t)&sd;
904 		}
905 	}
906 
907 	if (error) {
908 		goto out;
909 	}
910 
911 	VERIFY(dst != NULL);
912 
913 	if (uap->iov != USER_ADDR_NULL) {
914 		/* Verify range before calling uio_create() */
915 		if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
916 			error = EINVAL;
917 			goto out;
918 		}
919 
920 		if (uap->len == USER_ADDR_NULL) {
921 			error = EINVAL;
922 			goto out;
923 		}
924 
925 		/* allocate a uio to hold the number of iovecs passed */
926 		auio = uio_create(uap->iovcnt, 0,
927 		    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
928 		    UIO_WRITE);
929 
930 		if (auio == NULL) {
931 			error = ENOMEM;
932 			goto out;
933 		}
934 
935 		/*
936 		 * get location of iovecs within the uio.
937 		 * then copyin the iovecs from user space.
938 		 */
939 		iovp = uio_iovsaddr(auio);
940 		if (iovp == NULL) {
941 			error = ENOMEM;
942 			goto out;
943 		}
944 		error = copyin_user_iovec_array(uap->iov,
945 		    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
946 		    uap->iovcnt, iovp);
947 		if (error != 0) {
948 			goto out;
949 		}
950 
951 		/* finish setup of uio_t */
952 		error = uio_calculateresid(auio);
953 		if (error != 0) {
954 			goto out;
955 		}
956 	}
957 
958 	error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
959 	    &cid, auio, uap->flags, &bytes_written);
960 	if (error == ERESTART) {
961 		error = EINTR;
962 	}
963 
964 	if (uap->len != USER_ADDR_NULL) {
965 		if (IS_64BIT_PROCESS(p)) {
966 			error1 = copyout(&bytes_written, uap->len, sizeof(user64_size_t));
967 		} else {
968 			error1 = copyout(&bytes_written, uap->len, sizeof(user32_size_t));
969 		}
970 		/* give precedence to connectitx errors */
971 		if ((error1 != 0) && (error == 0)) {
972 			error = error1;
973 		}
974 	}
975 
976 	if (uap->connid != USER_ADDR_NULL) {
977 		error1 = copyout(&cid, uap->connid, sizeof(cid));
978 		/* give precedence to connectitx errors */
979 		if ((error1 != 0) && (error == 0)) {
980 			error = error1;
981 		}
982 	}
983 out:
984 	file_drop(fd);
985 	if (auio != NULL) {
986 		uio_free(auio);
987 	}
988 	if (src != NULL && src != SA(&ss)) {
989 		free_sockaddr(src);
990 	}
991 	if (dst != NULL && dst != SA(&sd)) {
992 		free_sockaddr(dst);
993 	}
994 	return error;
995 }
996 
997 int
connectx(proc_ref_t p,struct connectx_args * uap,int * retval)998 connectx(proc_ref_t p, struct connectx_args *uap, int *retval)
999 {
1000 	/*
1001 	 * Due to similiarity with a POSIX interface, define as
1002 	 * an unofficial cancellation point.
1003 	 */
1004 	__pthread_testcancel(1);
1005 	return connectx_nocancel(p, uap, retval);
1006 }
1007 
1008 static int
connectit(struct socket * so,sockaddr_ref_t sa)1009 connectit(struct socket *so, sockaddr_ref_t sa)
1010 {
1011 	int error;
1012 
1013 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
1014 #if CONFIG_MACF_SOCKET_SUBSET
1015 	if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
1016 		return error;
1017 	}
1018 #endif /* MAC_SOCKET_SUBSET */
1019 
1020 	socket_lock(so, 1);
1021 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1022 		error = EALREADY;
1023 		goto out;
1024 	}
1025 	error = soconnectlock(so, sa, 0);
1026 	if (error != 0) {
1027 		goto out;
1028 	}
1029 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1030 		error = EINPROGRESS;
1031 		goto out;
1032 	}
1033 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1034 		lck_mtx_t *mutex_held;
1035 
1036 		if (so->so_proto->pr_getlock != NULL) {
1037 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1038 		} else {
1039 			mutex_held = so->so_proto->pr_domain->dom_mtx;
1040 		}
1041 		error = msleep((caddr_t)&so->so_timeo, mutex_held,
1042 		    PSOCK | PCATCH, __func__, 0);
1043 		if (so->so_state & SS_DRAINING) {
1044 			error = ECONNABORTED;
1045 		}
1046 		if (error != 0) {
1047 			break;
1048 		}
1049 	}
1050 	if (error == 0) {
1051 		error = so->so_error;
1052 		so->so_error = 0;
1053 	}
1054 out:
1055 	socket_unlock(so, 1);
1056 	return error;
1057 }
1058 
1059 static int
connectitx(struct socket * so,sockaddr_ref_t src,sockaddr_ref_t dst,proc_ref_t p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uio_t auio,unsigned int flags,user_ssize_t * bytes_written)1060 connectitx(struct socket *so, sockaddr_ref_t src,
1061     sockaddr_ref_t dst, proc_ref_t p, uint32_t ifscope,
1062     sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
1063     user_ssize_t *bytes_written)
1064 {
1065 	int error;
1066 
1067 	VERIFY(dst != NULL);
1068 
1069 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1070 #if CONFIG_MACF_SOCKET_SUBSET
1071 	if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1072 		return error;
1073 	}
1074 
1075 	if (auio != NULL) {
1076 		if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1077 			return error;
1078 		}
1079 	}
1080 #endif /* MAC_SOCKET_SUBSET */
1081 
1082 	socket_lock(so, 1);
1083 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1084 		error = EALREADY;
1085 		goto out;
1086 	}
1087 
1088 	error = soconnectxlocked(so, src, dst, p, ifscope,
1089 	    aid, pcid, flags, NULL, 0, auio, bytes_written);
1090 	if (error != 0) {
1091 		goto out;
1092 	}
1093 	/*
1094 	 * If, after the call to soconnectxlocked the flag is still set (in case
1095 	 * data has been queued and the connect() has actually been triggered,
1096 	 * it will have been unset by the transport), we exit immediately. There
1097 	 * is no reason to wait on any event.
1098 	 */
1099 	if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1100 		error = 0;
1101 		goto out;
1102 	}
1103 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1104 		error = EINPROGRESS;
1105 		goto out;
1106 	}
1107 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1108 		lck_mtx_t *mutex_held;
1109 
1110 		if (so->so_proto->pr_getlock != NULL) {
1111 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1112 		} else {
1113 			mutex_held = so->so_proto->pr_domain->dom_mtx;
1114 		}
1115 		error = msleep((caddr_t)&so->so_timeo, mutex_held,
1116 		    PSOCK | PCATCH, __func__, 0);
1117 		if (so->so_state & SS_DRAINING) {
1118 			error = ECONNABORTED;
1119 		}
1120 		if (error != 0) {
1121 			break;
1122 		}
1123 	}
1124 	if (error == 0) {
1125 		error = so->so_error;
1126 		so->so_error = 0;
1127 	}
1128 out:
1129 	socket_unlock(so, 1);
1130 	return error;
1131 }
1132 
1133 int
peeloff(proc_ref_t p,struct peeloff_args * uap,int * retval)1134 peeloff(proc_ref_t p, struct peeloff_args *uap, int *retval)
1135 {
1136 #pragma unused(p, uap, retval)
1137 	/*
1138 	 * Due to similiarity with a POSIX interface, define as
1139 	 * an unofficial cancellation point.
1140 	 */
1141 	__pthread_testcancel(1);
1142 	return 0;
1143 }
1144 
1145 int
disconnectx(proc_ref_t p,struct disconnectx_args * uap,int * retval)1146 disconnectx(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1147 {
1148 	/*
1149 	 * Due to similiarity with a POSIX interface, define as
1150 	 * an unofficial cancellation point.
1151 	 */
1152 	__pthread_testcancel(1);
1153 	return disconnectx_nocancel(p, uap, retval);
1154 }
1155 
1156 static int
disconnectx_nocancel(proc_ref_t p,struct disconnectx_args * uap,int * retval)1157 disconnectx_nocancel(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1158 {
1159 #pragma unused(p, retval)
1160 	socket_ref_t so;
1161 	int fd = uap->s;
1162 	int error;
1163 
1164 	error = file_socket(fd, &so);
1165 	if (error != 0) {
1166 		return error;
1167 	}
1168 	if (so == NULL) {
1169 		error = EBADF;
1170 		goto out;
1171 	}
1172 
1173 	error = sodisconnectx(so, uap->aid, uap->cid);
1174 out:
1175 	file_drop(fd);
1176 	return error;
1177 }
1178 
1179 /*
1180  * Returns:	0			Success
1181  *	socreate:EAFNOSUPPORT
1182  *	socreate:EPROTOTYPE
1183  *	socreate:EPROTONOSUPPORT
1184  *	socreate:ENOBUFS
1185  *	socreate:ENOMEM
1186  *	socreate:EISCONN
1187  *	socreate:???			[other protocol families, IPSEC]
1188  *	falloc:ENFILE
1189  *	falloc:EMFILE
1190  *	falloc:ENOMEM
1191  *	copyout:EFAULT
1192  *	soconnect2:EINVAL
1193  *	soconnect2:EPROTOTYPE
1194  *	soconnect2:???			[other protocol families[
1195  */
1196 int
socketpair(proc_ref_t p,struct socketpair_args * uap,__unused int32_ref_t retval)1197 socketpair(proc_ref_t p, struct socketpair_args *uap,
1198     __unused int32_ref_t retval)
1199 {
1200 	fileproc_ref_t  fp1, fp2;
1201 	socket_ref_t so1, so2;
1202 	int fd, error, sv[2];
1203 
1204 	AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1205 	error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1206 	if (error) {
1207 		return error;
1208 	}
1209 	error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1210 	if (error) {
1211 		goto free1;
1212 	}
1213 
1214 	error = falloc(p, &fp1, &fd, vfs_context_current());
1215 	if (error) {
1216 		goto free2;
1217 	}
1218 	fp1->f_flag = FREAD | FWRITE;
1219 	fp1->f_ops = &socketops;
1220 	fp_set_data(fp1, so1);
1221 	sv[0] = fd;
1222 
1223 	error = falloc(p, &fp2, &fd, vfs_context_current());
1224 	if (error) {
1225 		goto free3;
1226 	}
1227 	fp2->f_flag = FREAD | FWRITE;
1228 	fp2->f_ops = &socketops;
1229 	fp_set_data(fp2, so2);
1230 	sv[1] = fd;
1231 
1232 	error = soconnect2(so1, so2);
1233 	if (error) {
1234 		goto free4;
1235 	}
1236 	if (uap->type == SOCK_DGRAM) {
1237 		/*
1238 		 * Datagram socket connection is asymmetric.
1239 		 */
1240 		error = soconnect2(so2, so1);
1241 		if (error) {
1242 			goto free4;
1243 		}
1244 	}
1245 
1246 	if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1247 		goto free4;
1248 	}
1249 
1250 	proc_fdlock(p);
1251 	procfdtbl_releasefd(p, sv[0], NULL);
1252 	procfdtbl_releasefd(p, sv[1], NULL);
1253 	fp_drop(p, sv[0], fp1, 1);
1254 	fp_drop(p, sv[1], fp2, 1);
1255 	proc_fdunlock(p);
1256 
1257 	return 0;
1258 free4:
1259 	fp_free(p, sv[1], fp2);
1260 free3:
1261 	fp_free(p, sv[0], fp1);
1262 free2:
1263 	(void) soclose(so2);
1264 free1:
1265 	(void) soclose(so1);
1266 	return error;
1267 }
1268 
1269 /*
1270  * Returns:	0			Success
1271  *		EINVAL
1272  *		ENOBUFS
1273  *		EBADF
1274  *		EPIPE
1275  *		EACCES			Mandatory Access Control failure
1276  *	file_socket:ENOTSOCK
1277  *	file_socket:EBADF
1278  *	getsockaddr:ENAMETOOLONG	Filename too long
1279  *	getsockaddr:EINVAL		Invalid argument
1280  *	getsockaddr:ENOMEM		Not enough space
1281  *	getsockaddr:EFAULT		Bad address
1282  *	<pru_sosend>:EACCES[TCP]
1283  *	<pru_sosend>:EADDRINUSE[TCP]
1284  *	<pru_sosend>:EADDRNOTAVAIL[TCP]
1285  *	<pru_sosend>:EAFNOSUPPORT[TCP]
1286  *	<pru_sosend>:EAGAIN[TCP]
1287  *	<pru_sosend>:EBADF
1288  *	<pru_sosend>:ECONNRESET[TCP]
1289  *	<pru_sosend>:EFAULT
1290  *	<pru_sosend>:EHOSTUNREACH[TCP]
1291  *	<pru_sosend>:EINTR
1292  *	<pru_sosend>:EINVAL
1293  *	<pru_sosend>:EISCONN[AF_INET]
1294  *	<pru_sosend>:EMSGSIZE[TCP]
1295  *	<pru_sosend>:ENETDOWN[TCP]
1296  *	<pru_sosend>:ENETUNREACH[TCP]
1297  *	<pru_sosend>:ENOBUFS
1298  *	<pru_sosend>:ENOMEM[TCP]
1299  *	<pru_sosend>:ENOTCONN[AF_INET]
1300  *	<pru_sosend>:EOPNOTSUPP
1301  *	<pru_sosend>:EPERM[TCP]
1302  *	<pru_sosend>:EPIPE
1303  *	<pru_sosend>:EWOULDBLOCK
1304  *	<pru_sosend>:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
1305  *	<pru_sosend>:???[AF_INET]	[whatever a filter author chooses]
1306  *	<pru_sosend>:???		[value from so_error]
1307  *	sockargs:???
1308  */
1309 static int
sendit(proc_ref_t p,struct socket * so,user_msghdr_ref_t mp,uio_t uiop,int flags,int32_ref_t retval)1310 sendit(proc_ref_t p, struct socket *so, user_msghdr_ref_t mp, uio_t uiop,
1311     int flags, int32_ref_t retval)
1312 {
1313 	mbuf_ref_t  control = NULL;
1314 	struct sockaddr_storage ss;
1315 	sockaddr_ref_t  to = NULL;
1316 	boolean_t want_free = TRUE;
1317 	int error;
1318 	user_ssize_t len;
1319 
1320 	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1321 
1322 	if (mp->msg_name != USER_ADDR_NULL) {
1323 		if (mp->msg_namelen > sizeof(ss)) {
1324 			error = getsockaddr(so, &to, mp->msg_name,
1325 			    mp->msg_namelen, TRUE);
1326 		} else {
1327 			error = getsockaddr_s(so, &ss, mp->msg_name,
1328 			    mp->msg_namelen, TRUE);
1329 			if (error == 0) {
1330 				to = (sockaddr_ref_t)&ss;
1331 				want_free = FALSE;
1332 			}
1333 		}
1334 		if (error != 0) {
1335 			goto out;
1336 		}
1337 		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1338 	}
1339 	if (mp->msg_control != USER_ADDR_NULL) {
1340 		if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1341 			error = EINVAL;
1342 			goto bad;
1343 		}
1344 		error = sockargs(&control, mp->msg_control,
1345 		    mp->msg_controllen, MT_CONTROL);
1346 		if (error != 0) {
1347 			goto bad;
1348 		}
1349 	}
1350 
1351 #if CONFIG_MACF_SOCKET_SUBSET
1352 	/*
1353 	 * We check the state without holding the socket lock;
1354 	 * if a race condition occurs, it would simply result
1355 	 * in an extra call to the MAC check function.
1356 	 */
1357 	if (to != NULL &&
1358 	    !(so->so_state & SS_DEFUNCT) &&
1359 	    (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1360 		if (control != NULL) {
1361 			m_freem(control);
1362 		}
1363 
1364 		goto bad;
1365 	}
1366 #endif /* MAC_SOCKET_SUBSET */
1367 
1368 	len = uio_resid(uiop);
1369 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1370 	    control, flags);
1371 	if (error != 0) {
1372 		if (uio_resid(uiop) != len && (error == ERESTART ||
1373 		    error == EINTR || error == EWOULDBLOCK)) {
1374 			error = 0;
1375 		}
1376 		/* Generation of SIGPIPE can be controlled per socket */
1377 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1378 		    !(flags & MSG_NOSIGNAL)) {
1379 			psignal(p, SIGPIPE);
1380 		}
1381 	}
1382 	if (error == 0) {
1383 		*retval = (int)(len - uio_resid(uiop));
1384 	}
1385 bad:
1386 	if (want_free) {
1387 		free_sockaddr(to);
1388 	}
1389 out:
1390 	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1391 
1392 	return error;
1393 }
1394 
1395 /*
1396  * Returns:	0			Success
1397  *		ENOMEM
1398  *	sendit:???			[see sendit definition in this file]
1399  *	write:???			[4056224: applicable for pipes]
1400  */
1401 int
sendto(proc_ref_t p,struct sendto_args * uap,int32_ref_t retval)1402 sendto(proc_ref_t p, struct sendto_args *uap, int32_ref_t retval)
1403 {
1404 	__pthread_testcancel(1);
1405 	return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1406 }
1407 
1408 int
sendto_nocancel(proc_ref_t p,struct sendto_nocancel_args * uap,int32_ref_t retval)1409 sendto_nocancel(proc_ref_t p,
1410     struct sendto_nocancel_args *uap,
1411     int32_ref_t retval)
1412 {
1413 	struct user_msghdr msg;
1414 	int error;
1415 	uio_t auio = NULL;
1416 	socket_ref_t so;
1417 
1418 	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1419 	AUDIT_ARG(fd, uap->s);
1420 
1421 	if (uap->flags & MSG_SKIPCFIL) {
1422 		error = EPERM;
1423 		goto done;
1424 	}
1425 
1426 	if (uap->len > LONG_MAX) {
1427 		error = EINVAL;
1428 		goto done;
1429 	}
1430 
1431 	auio = uio_create(1, 0,
1432 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1433 	    UIO_WRITE);
1434 	if (auio == NULL) {
1435 		error = ENOMEM;
1436 		goto done;
1437 	}
1438 	uio_addiov(auio, uap->buf, uap->len);
1439 
1440 	msg.msg_name = uap->to;
1441 	msg.msg_namelen = uap->tolen;
1442 	/* no need to set up msg_iov.  sendit uses uio_t we send it */
1443 	msg.msg_iov = 0;
1444 	msg.msg_iovlen = 0;
1445 	msg.msg_control = 0;
1446 	msg.msg_flags = 0;
1447 
1448 	error = file_socket(uap->s, &so);
1449 	if (error) {
1450 		goto done;
1451 	}
1452 
1453 	if (so == NULL) {
1454 		error = EBADF;
1455 	} else {
1456 		error = sendit(p, so, &msg, auio, uap->flags, retval);
1457 	}
1458 
1459 	file_drop(uap->s);
1460 done:
1461 	if (auio != NULL) {
1462 		uio_free(auio);
1463 	}
1464 
1465 	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1466 
1467 	return error;
1468 }
1469 
1470 /*
1471  * Returns:	0			Success
1472  *		ENOBUFS
1473  *	copyin:EFAULT
1474  *	sendit:???			[see sendit definition in this file]
1475  */
1476 int
sendmsg(proc_ref_t p,struct sendmsg_args * uap,int32_ref_t retval)1477 sendmsg(proc_ref_t p, struct sendmsg_args *uap, int32_ref_t retval)
1478 {
1479 	__pthread_testcancel(1);
1480 	return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1481 	           retval);
1482 }
1483 
1484 int
sendmsg_nocancel(proc_ref_t p,struct sendmsg_nocancel_args * uap,int32_ref_t retval)1485 sendmsg_nocancel(proc_ref_t p, struct sendmsg_nocancel_args *uap,
1486     int32_ref_t retval)
1487 {
1488 	struct user32_msghdr msg32;
1489 	struct user64_msghdr msg64;
1490 	struct user_msghdr user_msg;
1491 	caddr_t msghdrp;
1492 	int     size_of_msghdr;
1493 	int error;
1494 	uio_t auio = NULL;
1495 	struct user_iovec *iovp;
1496 	socket_ref_t so;
1497 
1498 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1499 
1500 	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1501 	AUDIT_ARG(fd, uap->s);
1502 
1503 	if (uap->flags & MSG_SKIPCFIL) {
1504 		error = EPERM;
1505 		goto done;
1506 	}
1507 
1508 	if (is_p_64bit_process) {
1509 		msghdrp = (caddr_t)&msg64;
1510 		size_of_msghdr = sizeof(msg64);
1511 	} else {
1512 		msghdrp = (caddr_t)&msg32;
1513 		size_of_msghdr = sizeof(msg32);
1514 	}
1515 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
1516 	if (error) {
1517 		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1518 		return error;
1519 	}
1520 
1521 	if (is_p_64bit_process) {
1522 		user_msg.msg_flags = msg64.msg_flags;
1523 		user_msg.msg_controllen = msg64.msg_controllen;
1524 		user_msg.msg_control = (user_addr_t)msg64.msg_control;
1525 		user_msg.msg_iovlen = msg64.msg_iovlen;
1526 		user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
1527 		user_msg.msg_namelen = msg64.msg_namelen;
1528 		user_msg.msg_name = (user_addr_t)msg64.msg_name;
1529 	} else {
1530 		user_msg.msg_flags = msg32.msg_flags;
1531 		user_msg.msg_controllen = msg32.msg_controllen;
1532 		user_msg.msg_control = msg32.msg_control;
1533 		user_msg.msg_iovlen = msg32.msg_iovlen;
1534 		user_msg.msg_iov = msg32.msg_iov;
1535 		user_msg.msg_namelen = msg32.msg_namelen;
1536 		user_msg.msg_name = msg32.msg_name;
1537 	}
1538 
1539 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1540 		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1541 		    0, 0, 0, 0);
1542 		return EMSGSIZE;
1543 	}
1544 
1545 	/* allocate a uio large enough to hold the number of iovecs passed */
1546 	auio = uio_create(user_msg.msg_iovlen, 0,
1547 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1548 	    UIO_WRITE);
1549 	if (auio == NULL) {
1550 		error = ENOBUFS;
1551 		goto done;
1552 	}
1553 
1554 	if (user_msg.msg_iovlen) {
1555 		/*
1556 		 * get location of iovecs within the uio.
1557 		 * then copyin the iovecs from user space.
1558 		 */
1559 		iovp = uio_iovsaddr(auio);
1560 		if (iovp == NULL) {
1561 			error = ENOBUFS;
1562 			goto done;
1563 		}
1564 		error = copyin_user_iovec_array(user_msg.msg_iov,
1565 		    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1566 		    user_msg.msg_iovlen, iovp);
1567 		if (error) {
1568 			goto done;
1569 		}
1570 		user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1571 
1572 		/* finish setup of uio_t */
1573 		error = uio_calculateresid(auio);
1574 		if (error) {
1575 			goto done;
1576 		}
1577 	} else {
1578 		user_msg.msg_iov = 0;
1579 	}
1580 
1581 	/* msg_flags is ignored for send */
1582 	user_msg.msg_flags = 0;
1583 
1584 	error = file_socket(uap->s, &so);
1585 	if (error) {
1586 		goto done;
1587 	}
1588 	if (so == NULL) {
1589 		error = EBADF;
1590 	} else {
1591 		error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1592 	}
1593 	file_drop(uap->s);
1594 done:
1595 	if (auio != NULL) {
1596 		uio_free(auio);
1597 	}
1598 	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1599 
1600 	return error;
1601 }
1602 
1603 #if DEBUG || DEVELOPMENT
1604 static int
sendmsg_x_old(proc_ref_t p,struct sendmsg_x_args * uap,user_ssize_t * retval)1605 sendmsg_x_old(proc_ref_t p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1606 {
1607 	int error = 0;
1608 	user_msghdr_x_ptr_t user_msg_x = NULL;
1609 	uio_ref_ptr_t uiop = NULL;
1610 	socket_ref_t so;
1611 	u_int i;
1612 	sockaddr_ref_t to = NULL;
1613 	user_ssize_t len_before = 0, len_after;
1614 	int need_drop = 0;
1615 	size_t size_of_msghdr;
1616 	void_ptr_t umsgp = NULL;
1617 	u_int uiocnt = 0;
1618 	int has_addr_or_ctl = 0;
1619 
1620 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1621 
1622 	size_of_msghdr = IS_64BIT_PROCESS(p) ?
1623 	    sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1624 
1625 	if (uap->flags & MSG_SKIPCFIL) {
1626 		error = EPERM;
1627 		goto out;
1628 	}
1629 
1630 	error = file_socket(uap->s, &so);
1631 	if (error) {
1632 		goto out;
1633 	}
1634 	need_drop = 1;
1635 	if (so == NULL) {
1636 		error = EBADF;
1637 		goto out;
1638 	}
1639 
1640 	/*
1641 	 * Input parameter range check
1642 	 */
1643 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1644 		error = EINVAL;
1645 		goto out;
1646 	}
1647 	/*
1648 	 * Clip to max currently allowed
1649 	 */
1650 	if (uap->cnt > somaxsendmsgx) {
1651 		uap->cnt = somaxsendmsgx > 0 ? somaxsendmsgx : 1;
1652 	}
1653 
1654 	user_msg_x = kalloc_type(struct user_msghdr_x, uap->cnt,
1655 	    Z_WAITOK | Z_ZERO);
1656 	if (user_msg_x == NULL) {
1657 		DBG_PRINTF("%s user_msg_x alloc failed", __func__);
1658 		error = ENOMEM;
1659 		goto out;
1660 	}
1661 	uiop = kalloc_type(uio_ref_t, uap->cnt, Z_WAITOK | Z_ZERO);
1662 	if (uiop == NULL) {
1663 		DBG_PRINTF("%s uiop alloc failed", __func__);
1664 		error = ENOMEM;
1665 		goto out;
1666 	}
1667 
1668 	umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
1669 	if (umsgp == NULL) {
1670 		DBG_PRINTF("%s user_msg_x alloc failed", __func__);
1671 		error = ENOMEM;
1672 		goto out;
1673 	}
1674 	error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1675 	if (error) {
1676 		DBG_PRINTF("%s copyin() failed", __func__);
1677 		goto out;
1678 	}
1679 	error = internalize_user_msghdr_array(umsgp,
1680 	    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1681 	    UIO_WRITE, uap->cnt, user_msg_x, uiop);
1682 	if (error) {
1683 		DBG_PRINTF("%s copyin_user_msghdr_array() failed", __func__);
1684 		goto out;
1685 	}
1686 	/*
1687 	 * Make sure the size of each message iovec and
1688 	 * the aggregate size of all the iovec is valid
1689 	 */
1690 	if (uio_array_is_valid(uiop, uap->cnt) == false) {
1691 		error = EINVAL;
1692 		goto out;
1693 	}
1694 
1695 	/*
1696 	 * Sanity check on passed arguments
1697 	 */
1698 	for (i = 0; i < uap->cnt; i++) {
1699 		struct user_msghdr_x *mp = user_msg_x + i;
1700 
1701 		/*
1702 		 * No flags on send message
1703 		 */
1704 		if (mp->msg_flags != 0) {
1705 			error = EINVAL;
1706 			goto out;
1707 		}
1708 		/*
1709 		 * No support for address or ancillary data (yet)
1710 		 */
1711 		if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
1712 			has_addr_or_ctl = 1;
1713 		}
1714 
1715 		if (mp->msg_control != USER_ADDR_NULL ||
1716 		    mp->msg_controllen != 0) {
1717 			has_addr_or_ctl = 1;
1718 		}
1719 
1720 #if CONFIG_MACF_SOCKET_SUBSET
1721 		/*
1722 		 * We check the state without holding the socket lock;
1723 		 * if a race condition occurs, it would simply result
1724 		 * in an extra call to the MAC check function.
1725 		 *
1726 		 * Note: The following check is never true taken with the
1727 		 * current limitation that we do not accept to pass an address,
1728 		 * this is effectively placeholder code. If we add support for
1729 		 * addresses, we will have to check every address.
1730 		 */
1731 		if (to != NULL &&
1732 		    !(so->so_state & SS_DEFUNCT) &&
1733 		    (error = mac_socket_check_send(kauth_cred_get(), so, to))
1734 		    != 0) {
1735 			goto out;
1736 		}
1737 #endif /* MAC_SOCKET_SUBSET */
1738 	}
1739 
1740 	len_before = uio_array_resid(uiop, uap->cnt);
1741 
1742 	for (i = 0; i < uap->cnt; i++) {
1743 		struct user_msghdr_x *mp = user_msg_x + i;
1744 		struct user_msghdr user_msg;
1745 		uio_t auio = uiop[i];
1746 		int32_t tmpval;
1747 
1748 		user_msg.msg_flags = mp->msg_flags;
1749 		user_msg.msg_controllen = mp->msg_controllen;
1750 		user_msg.msg_control = mp->msg_control;
1751 		user_msg.msg_iovlen = mp->msg_iovlen;
1752 		user_msg.msg_iov = mp->msg_iov;
1753 		user_msg.msg_namelen = mp->msg_namelen;
1754 		user_msg.msg_name = mp->msg_name;
1755 
1756 		error = sendit(p, so, &user_msg, auio, uap->flags,
1757 		    &tmpval);
1758 		if (error != 0) {
1759 			break;
1760 		}
1761 		uiocnt += 1;
1762 	}
1763 
1764 	len_after = uio_array_resid(uiop, uap->cnt);
1765 
1766 	VERIFY(len_after <= len_before);
1767 
1768 	if (error != 0) {
1769 		if (len_after != len_before && (error == ERESTART ||
1770 		    error == EINTR || error == EWOULDBLOCK ||
1771 		    error == ENOBUFS)) {
1772 			error = 0;
1773 		}
1774 		/* Generation of SIGPIPE can be controlled per socket */
1775 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1776 		    !(uap->flags & MSG_NOSIGNAL)) {
1777 			psignal(p, SIGPIPE);
1778 		}
1779 	}
1780 	if (error == 0) {
1781 		externalize_user_msghdr_array(umsgp,
1782 		    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1783 		    UIO_WRITE, uiocnt, user_msg_x, uiop);
1784 
1785 		*retval = (int)(uiocnt);
1786 	}
1787 out:
1788 	if (need_drop) {
1789 		file_drop(uap->s);
1790 	}
1791 	kfree_data(umsgp, uap->cnt * size_of_msghdr);
1792 	if (uiop != NULL) {
1793 		free_uio_array(uiop, uap->cnt);
1794 		kfree_type(uio_ref_t, uap->cnt, uiop);
1795 	}
1796 	kfree_type(struct user_msghdr_x, uap->cnt, user_msg_x);
1797 
1798 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1799 
1800 	return error;
1801 }
1802 #endif /* DEBUG || DEVELOPMENT */
1803 
1804 static int
internalize_user_msg_x(struct user_msghdr * user_msg,uio_t * auiop,proc_ref_t p,void_ptr_t user_msghdr_x_src)1805 internalize_user_msg_x(struct user_msghdr *user_msg, uio_t *auiop, proc_ref_t p, void_ptr_t user_msghdr_x_src)
1806 {
1807 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1808 	uio_t auio = *auiop;
1809 	int error;
1810 
1811 	if (is_p_64bit_process) {
1812 		struct user64_msghdr_x msghdrx64;
1813 
1814 		error = copyin((user_addr_t)user_msghdr_x_src,
1815 		    &msghdrx64, sizeof(msghdrx64));
1816 		if (error != 0) {
1817 			DBG_PRINTF("%s copyin() msghdrx64 failed %d",
1818 			    __func__, error);
1819 			goto done;
1820 		}
1821 		user_msg->msg_name = msghdrx64.msg_name;
1822 		user_msg->msg_namelen = msghdrx64.msg_namelen;
1823 		user_msg->msg_iov = msghdrx64.msg_iov;
1824 		user_msg->msg_iovlen = msghdrx64.msg_iovlen;
1825 		user_msg->msg_control = msghdrx64.msg_control;
1826 		user_msg->msg_controllen = msghdrx64.msg_controllen;
1827 	} else {
1828 		struct user32_msghdr_x msghdrx32;
1829 
1830 		error = copyin((user_addr_t)user_msghdr_x_src,
1831 		    &msghdrx32, sizeof(msghdrx32));
1832 		if (error != 0) {
1833 			DBG_PRINTF("%s copyin() msghdrx32 failed %d",
1834 			    __func__, error);
1835 			goto done;
1836 		}
1837 		user_msg->msg_name = msghdrx32.msg_name;
1838 		user_msg->msg_namelen = msghdrx32.msg_namelen;
1839 		user_msg->msg_iov = msghdrx32.msg_iov;
1840 		user_msg->msg_iovlen = msghdrx32.msg_iovlen;
1841 		user_msg->msg_control = msghdrx32.msg_control;
1842 		user_msg->msg_controllen = msghdrx32.msg_controllen;
1843 	}
1844 	/* msg_flags is ignored for send */
1845 	user_msg->msg_flags = 0;
1846 
1847 	if (user_msg->msg_iovlen <= 0 || user_msg->msg_iovlen > UIO_MAXIOV) {
1848 		error = EMSGSIZE;
1849 		DBG_PRINTF("%s bad msg_iovlen, error %d",
1850 		    __func__, error);
1851 		goto done;
1852 	}
1853 	/*
1854 	 * Attempt to reuse the uio if large enough, otherwise we need
1855 	 * a new one
1856 	 */
1857 	if (auio != NULL) {
1858 		if (auio->uio_max_iovs >= user_msg->msg_iovlen) {
1859 			uio_reset(auio, 0,
1860 			    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1861 			    UIO_WRITE);
1862 		} else {
1863 			uio_free(auio);
1864 			auio = NULL;
1865 		}
1866 	}
1867 	if (auio == NULL) {
1868 		auio = uio_create(user_msg->msg_iovlen, 0,
1869 		    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1870 		    UIO_WRITE);
1871 		if (auio == NULL) {
1872 			error = ENOBUFS;
1873 			DBG_PRINTF("%s uio_create() failed %d",
1874 			    __func__, error);
1875 			goto done;
1876 		}
1877 	}
1878 
1879 	if (user_msg->msg_iovlen) {
1880 		/*
1881 		 * get location of iovecs within the uio.
1882 		 * then copyin the iovecs from user space.
1883 		 */
1884 		struct user_iovec *iovp = uio_iovsaddr(auio);
1885 		if (iovp == NULL) {
1886 			error = ENOBUFS;
1887 			goto done;
1888 		}
1889 		error = copyin_user_iovec_array(user_msg->msg_iov,
1890 		    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1891 		    user_msg->msg_iovlen, iovp);
1892 		if (error != 0) {
1893 			goto done;
1894 		}
1895 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
1896 
1897 		/* finish setup of uio_t */
1898 		error = uio_calculateresid(auio);
1899 		if (error) {
1900 			goto done;
1901 		}
1902 	} else {
1903 		user_msg->msg_iov = 0;
1904 	}
1905 
1906 done:
1907 	*auiop = auio;
1908 	return error;
1909 }
1910 
1911 static int
mbuf_packet_from_uio(socket_ref_t so,mbuf_ref_ref_t mp,uio_t auio)1912 mbuf_packet_from_uio(socket_ref_t so, mbuf_ref_ref_t mp, uio_t auio)
1913 {
1914 	int error = 0;
1915 	uint16_t headroom = 0;
1916 	size_t bytes_to_alloc;
1917 	mbuf_ref_t top = NULL, m;
1918 
1919 	if (soreserveheadroom != 0) {
1920 		headroom = so->so_pktheadroom;
1921 	}
1922 	bytes_to_alloc = headroom + uio_resid(auio);
1923 
1924 	error = mbuf_allocpacket(MBUF_WAITOK, bytes_to_alloc, NULL, &top);
1925 	if (error != 0) {
1926 		os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: mbuf_allocpacket %zu error %d",
1927 		    bytes_to_alloc, error);
1928 		goto done;
1929 	}
1930 
1931 	if (headroom > 0 && headroom < mbuf_maxlen(top)) {
1932 		top->m_data += headroom;
1933 	}
1934 
1935 	for (m = top; m != NULL; m = m->m_next) {
1936 		int bytes_to_copy = (int)uio_resid(auio);
1937 		ssize_t mlen;
1938 
1939 		if ((m->m_flags & M_EXT)) {
1940 			mlen = m->m_ext.ext_size -
1941 			    M_LEADINGSPACE(m);
1942 		} else if ((m->m_flags & M_PKTHDR)) {
1943 			mlen = MHLEN - M_LEADINGSPACE(m);
1944 			m_add_crumb(m, PKT_CRUMB_SOSEND);
1945 		} else {
1946 			mlen = MLEN - M_LEADINGSPACE(m);
1947 		}
1948 		int len = imin((int)mlen, bytes_to_copy);
1949 
1950 		error = uiomove(mtod(m, caddr_t), (int)len, auio);
1951 		if (error != 0) {
1952 			os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: len %d error %d",
1953 			    len, error);
1954 			goto done;
1955 		}
1956 		m->m_len = len;
1957 		top->m_pkthdr.len += len;
1958 	}
1959 
1960 done:
1961 	if (error != 0) {
1962 		m_freem(top);
1963 	} else {
1964 		*mp = top;
1965 	}
1966 	return error;
1967 }
1968 
1969 static int
sendit_x(proc_ref_t p,socket_ref_t so,struct sendmsg_x_args * uap,u_int * retval)1970 sendit_x(proc_ref_t p, socket_ref_t so, struct sendmsg_x_args *uap, u_int *retval)
1971 {
1972 	int error = 0;
1973 	uio_t auio = NULL;
1974 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1975 	void_ptr_t src;
1976 	MBUFQ_HEAD() pktlist = {};
1977 	size_t total_pkt_len = 0;
1978 	u_int pkt_cnt = 0;
1979 	int flags = uap->flags;
1980 	mbuf_ref_t top;
1981 
1982 	MBUFQ_INIT(&pktlist);
1983 
1984 	*retval = 0;
1985 
1986 	/* We re-use the uio when possible */
1987 	auio = uio_create(1, 0,
1988 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1989 	    UIO_WRITE);
1990 	if (auio == NULL) {
1991 		error = ENOBUFS;
1992 		DBG_PRINTF("%s uio_create() failed %d",
1993 		    __func__, error);
1994 		goto done;
1995 	}
1996 
1997 	src = (void_ptr_t)uap->msgp;
1998 
1999 	/*
2000 	 * Create a list of packets
2001 	 */
2002 	for (u_int i = 0; i < uap->cnt; i++) {
2003 		struct user_msghdr user_msg = {};
2004 		mbuf_ref_t m = NULL;
2005 
2006 		if (is_p_64bit_process) {
2007 			error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user64_msghdr_x *)src) + i);
2008 			if (error != 0) {
2009 				os_log(OS_LOG_DEFAULT, "sendit_x: internalize_user_msg_x error %d\n", error);
2010 				goto done;
2011 			}
2012 		} else {
2013 			error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user32_msghdr_x *)src) + i);
2014 			if (error != 0) {
2015 				os_log(OS_LOG_DEFAULT, "sendit_x: internalize_user_msg_x error %d\n", error);
2016 				goto done;
2017 			}
2018 		}
2019 		/*
2020 		 * Stop on the first datagram that is too large
2021 		 */
2022 		if (uio_resid(auio) > so->so_snd.sb_hiwat) {
2023 			if (i == 0) {
2024 				error = EMSGSIZE;
2025 				goto done;
2026 			}
2027 			break;
2028 		}
2029 		/*
2030 		 * An mbuf packet has the control mbuf(s) followed by data
2031 		 * We allocate the mbufs in reverse order
2032 		 */
2033 		error = mbuf_packet_from_uio(so, &m, auio);
2034 		if (error != 0) {
2035 			os_log(OS_LOG_DEFAULT, "sendit_x: mbuf_packet_from_uio error %d\n", error);
2036 			goto done;
2037 		}
2038 		total_pkt_len += m->m_pkthdr.len;
2039 
2040 		if (user_msg.msg_control != USER_ADDR_NULL && user_msg.msg_controllen != 0) {
2041 			mbuf_ref_t control = NULL;
2042 
2043 			error = sockargs(&control, user_msg.msg_control, user_msg.msg_controllen, MT_CONTROL);
2044 			if (error != 0) {
2045 				os_log(OS_LOG_DEFAULT, "sendit_x: sockargs error %d\n", error);
2046 				goto done;
2047 			}
2048 			control->m_next = m;
2049 			m = control;
2050 		}
2051 		MBUFQ_ENQUEUE(&pktlist, m);
2052 
2053 		pkt_cnt += 1;
2054 	}
2055 
2056 	top = MBUFQ_FIRST(&pktlist);
2057 	MBUFQ_INIT(&pktlist);
2058 	error = sosend_list(so, top, total_pkt_len, &pkt_cnt, flags);
2059 	if (error != 0) {
2060 		os_log(OS_LOG_DEFAULT, "sendit_x: sosend_list error %d\n", error);
2061 		goto done;
2062 	}
2063 done:
2064 	*retval = pkt_cnt;
2065 
2066 	if (auio != NULL) {
2067 		uio_free(auio);
2068 	}
2069 	MBUFQ_DRAIN(&pktlist);
2070 	return error;
2071 }
2072 
2073 int
sendmsg_x(proc_ref_t p,struct sendmsg_x_args * uap,user_ssize_t * retval)2074 sendmsg_x(proc_ref_t p, struct sendmsg_x_args *uap, user_ssize_t *retval)
2075 {
2076 	void_ptr_t src;
2077 	int error;
2078 	uio_t auio = NULL;
2079 	socket_ref_t so;
2080 	u_int uiocnt = 0;
2081 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2082 
2083 #if DEBUG || DEVELOPMENT
2084 	if (sendmsg_x_mode == 2) {
2085 		return sendmsg_x_old(p, uap, retval);
2086 	}
2087 #endif /* DEBUG || DEVELOPMENT */
2088 
2089 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2090 	AUDIT_ARG(fd, uap->s);
2091 
2092 	if (uap->flags & MSG_SKIPCFIL) {
2093 		error = EPERM;
2094 		goto done_no_filedrop;
2095 	}
2096 
2097 	error = file_socket(uap->s, &so);
2098 	if (error) {
2099 		goto done_no_filedrop;
2100 	}
2101 	if (so == NULL) {
2102 		error = EBADF;
2103 		goto done;
2104 	}
2105 
2106 	/*
2107 	 * For an atomic datagram connected socket we can build the list of
2108 	 * mbuf packets with sosend_list()
2109 	 */
2110 	if (so->so_type == SOCK_DGRAM && sosendallatonce(so) &&
2111 	    (so->so_state & SS_ISCONNECTED) && sendmsg_x_mode != 1) {
2112 		error = sendit_x(p, so, uap, &uiocnt);
2113 		if (error != 0) {
2114 			DBG_PRINTF("%s sendit_x() failed %d",
2115 			    __func__, error);
2116 		}
2117 		goto done;
2118 	}
2119 
2120 	src = (void_ptr_t)uap->msgp;
2121 
2122 	/* We re-use the uio when possible */
2123 	auio = uio_create(1, 0,
2124 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
2125 	    UIO_WRITE);
2126 	if (auio == NULL) {
2127 		error = ENOBUFS;
2128 		DBG_PRINTF("%s uio_create() failed %d",
2129 		    __func__, error);
2130 		goto done;
2131 	}
2132 
2133 	for (u_int i = 0; i < uap->cnt; i++) {
2134 		struct user_msghdr user_msg = {};
2135 
2136 		if (is_p_64bit_process) {
2137 			error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user64_msghdr_x *)src) + i);
2138 			if (error != 0) {
2139 				goto done;
2140 			}
2141 		} else {
2142 			error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user32_msghdr_x *)src) + i);
2143 			if (error != 0) {
2144 				goto done;
2145 			}
2146 		}
2147 
2148 		int32_t len = 0;
2149 		error = sendit(p, so, &user_msg, auio, uap->flags, &len);
2150 		if (error != 0) {
2151 			break;
2152 		}
2153 		uiocnt += 1;
2154 	}
2155 done:
2156 	if (error != 0) {
2157 		if (uiocnt != 0 && (error == ERESTART ||
2158 		    error == EINTR || error == EWOULDBLOCK ||
2159 		    error == ENOBUFS || error == EMSGSIZE)) {
2160 			error = 0;
2161 		}
2162 		/* Generation of SIGPIPE can be controlled per socket */
2163 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
2164 		    !(uap->flags & MSG_NOSIGNAL)) {
2165 			psignal(p, SIGPIPE);
2166 		}
2167 	}
2168 	if (error == 0) {
2169 		*retval = (int)(uiocnt);
2170 	}
2171 	file_drop(uap->s);
2172 
2173 done_no_filedrop:
2174 	if (auio != NULL) {
2175 		uio_free(auio);
2176 	}
2177 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2178 
2179 	return error;
2180 }
2181 
2182 
2183 static int
copyout_sa(sockaddr_ref_t fromsa,user_addr_t name,socklen_t * namelen)2184 copyout_sa(sockaddr_ref_t fromsa, user_addr_t name, socklen_t *namelen)
2185 {
2186 	int error = 0;
2187 	socklen_t sa_len = 0;
2188 	ssize_t len;
2189 
2190 	len = *namelen;
2191 	if (len <= 0 || fromsa == 0) {
2192 		len = 0;
2193 	} else {
2194 #ifndef MIN
2195 #define MIN(a, b) ((a) > (b) ? (b) : (a))
2196 #endif
2197 		sa_len = fromsa->sa_len;
2198 		len = MIN((unsigned int)len, sa_len);
2199 		error = copyout(fromsa, name, (unsigned)len);
2200 		if (error) {
2201 			goto out;
2202 		}
2203 	}
2204 	*namelen = sa_len;
2205 out:
2206 	return 0;
2207 }
2208 
2209 static int
copyout_maddr(struct mbuf * m,user_addr_t name,socklen_t * namelen)2210 copyout_maddr(struct mbuf *m, user_addr_t name, socklen_t *namelen)
2211 {
2212 	int error = 0;
2213 	socklen_t sa_len = 0;
2214 	ssize_t len;
2215 
2216 	len = *namelen;
2217 	if (len <= 0 || m == NULL) {
2218 		len = 0;
2219 	} else {
2220 #ifndef MIN
2221 #define MIN(a, b) ((a) > (b) ? (b) : (a))
2222 #endif
2223 		struct sockaddr *fromsa = mtod(m, struct sockaddr *);
2224 
2225 		sa_len = fromsa->sa_len;
2226 		len = MIN((unsigned int)len, sa_len);
2227 		error = copyout(fromsa, name, (unsigned)len);
2228 		if (error != 0) {
2229 			goto out;
2230 		}
2231 	}
2232 	*namelen = sa_len;
2233 out:
2234 	return 0;
2235 }
2236 
2237 static int
copyout_control(proc_ref_t p,mbuf_ref_t m,user_addr_t control,socklen_ref_t controllen,int_ref_t flags,socket_ref_t so)2238 copyout_control(proc_ref_t p, mbuf_ref_t m, user_addr_t control,
2239     socklen_ref_t controllen, int_ref_t flags, socket_ref_t so)
2240 {
2241 	int error = 0;
2242 	socklen_t len;
2243 	user_addr_t ctlbuf;
2244 	struct inpcb *inp = NULL;
2245 	bool want_pktinfo = false;
2246 	bool seen_pktinfo = false;
2247 
2248 	if (so != NULL && (SOCK_DOM(so) == PF_INET6 || SOCK_DOM(so) == PF_INET)) {
2249 		inp = sotoinpcb(so);
2250 		want_pktinfo = (inp->inp_flags & IN6P_PKTINFO) != 0;
2251 	}
2252 
2253 	len = *controllen;
2254 	*controllen = 0;
2255 	ctlbuf = control;
2256 
2257 	while (m && len > 0) {
2258 		socklen_t tocopy;
2259 		struct cmsghdr *cp = mtod(m, struct cmsghdr *);
2260 		socklen_t cp_size = CMSG_ALIGN(cp->cmsg_len);
2261 		socklen_t buflen = m->m_len;
2262 
2263 		while (buflen > 0 && len > 0) {
2264 			/*
2265 			 * SCM_TIMESTAMP hack because  struct timeval has a
2266 			 * different size for 32 bits and 64 bits processes
2267 			 */
2268 			if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
2269 				unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
2270 				struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
2271 				socklen_t tmp_space;
2272 				struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
2273 
2274 				tmp_cp->cmsg_level = SOL_SOCKET;
2275 				tmp_cp->cmsg_type = SCM_TIMESTAMP;
2276 
2277 				if (proc_is64bit(p)) {
2278 					struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
2279 
2280 					os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
2281 					os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
2282 
2283 					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
2284 					tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
2285 				} else {
2286 					struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
2287 
2288 					tv32->tv_sec = (user32_time_t)tv->tv_sec;
2289 					tv32->tv_usec = tv->tv_usec;
2290 
2291 					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
2292 					tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
2293 				}
2294 				if (len >= tmp_space) {
2295 					tocopy = tmp_space;
2296 				} else {
2297 					*flags |= MSG_CTRUNC;
2298 					tocopy = len;
2299 				}
2300 				error = copyout(tmp_buffer, ctlbuf, tocopy);
2301 				if (error) {
2302 					goto out;
2303 				}
2304 			} else {
2305 				/* If socket has flow tracking and socket did not request address, ignore it */
2306 				if (SOFLOW_ENABLED(so) &&
2307 				    ((cp->cmsg_level == IPPROTO_IP && cp->cmsg_type == IP_RECVDSTADDR && inp != NULL &&
2308 				    !(inp->inp_flags & INP_RECVDSTADDR)) ||
2309 				    (cp->cmsg_level == IPPROTO_IPV6 && (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO) && inp &&
2310 				    !(inp->inp_flags & IN6P_PKTINFO)))) {
2311 					tocopy = 0;
2312 				} else {
2313 					if (cp_size > buflen) {
2314 						panic("cp_size > buflen, something wrong with alignment!");
2315 					}
2316 					if (len >= cp_size) {
2317 						tocopy = cp_size;
2318 					} else {
2319 						*flags |= MSG_CTRUNC;
2320 						tocopy = len;
2321 					}
2322 					error = copyout((caddr_t) cp, ctlbuf, tocopy);
2323 					if (error) {
2324 						goto out;
2325 					}
2326 					if (want_pktinfo && cp->cmsg_level == IPPROTO_IPV6 &&
2327 					    (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO)) {
2328 						seen_pktinfo = true;
2329 					}
2330 				}
2331 			}
2332 
2333 
2334 			ctlbuf += tocopy;
2335 			len -= tocopy;
2336 
2337 			buflen -= cp_size;
2338 			cp = (struct cmsghdr *)(void *)
2339 			    ((unsigned char *) cp + cp_size);
2340 			cp_size = CMSG_ALIGN(cp->cmsg_len);
2341 		}
2342 
2343 		m = m->m_next;
2344 	}
2345 	*controllen = (socklen_t)(ctlbuf - control);
2346 out:
2347 	if (want_pktinfo && !seen_pktinfo) {
2348 		missingpktinfo += 1;
2349 #if (DEBUG || DEVELOPMENT)
2350 		char pname[MAXCOMLEN];
2351 		char local[MAX_IPv6_STR_LEN + 6];
2352 		char remote[MAX_IPv6_STR_LEN + 6];
2353 
2354 		proc_name(so->last_pid, pname, sizeof(MAXCOMLEN));
2355 		if (inp->inp_vflag & INP_IPV6) {
2356 			inet_ntop(AF_INET6, &inp->in6p_laddr.s6_addr, local, sizeof(local));
2357 			inet_ntop(AF_INET6, &inp->in6p_faddr.s6_addr, remote, sizeof(local));
2358 		} else {
2359 			inet_ntop(AF_INET, &inp->inp_laddr.s_addr, local, sizeof(local));
2360 			inet_ntop(AF_INET, &inp->inp_faddr.s_addr, remote, sizeof(local));
2361 		}
2362 
2363 		os_log(OS_LOG_DEFAULT,
2364 		    "cmsg IPV6_PKTINFO missing for %s:%u > %s:%u proc %s.%u error %d\n",
2365 		    local, ntohs(inp->inp_lport), remote, ntohs(inp->inp_fport),
2366 		    pname, so->last_pid, error);
2367 #endif /* (DEBUG || DEVELOPMENT) */
2368 	}
2369 	return error;
2370 }
2371 
2372 /*
2373  * Returns:	0			Success
2374  *		ENOTSOCK
2375  *		EINVAL
2376  *		EBADF
2377  *		EACCES			Mandatory Access Control failure
2378  *	copyout:EFAULT
2379  *	fp_lookup:EBADF
2380  *	<pru_soreceive>:ENOBUFS
2381  *	<pru_soreceive>:ENOTCONN
2382  *	<pru_soreceive>:EWOULDBLOCK
2383  *	<pru_soreceive>:EFAULT
2384  *	<pru_soreceive>:EINTR
2385  *	<pru_soreceive>:EBADF
2386  *	<pru_soreceive>:EINVAL
2387  *	<pru_soreceive>:EMSGSIZE
2388  *	<pru_soreceive>:???
2389  *
2390  * Notes:	Additional return values from calls through <pru_soreceive>
2391  *		depend on protocols other than TCP or AF_UNIX, which are
2392  *		documented above.
2393  */
2394 static int
recvit(proc_ref_t p,int s,user_msghdr_ref_t mp,uio_t uiop,user_addr_t namelenp,int32_ref_t retval)2395 recvit(proc_ref_t p, int s, user_msghdr_ref_t mp, uio_t uiop,
2396     user_addr_t namelenp, int32_ref_t retval)
2397 {
2398 	ssize_t len;
2399 	int error;
2400 	mbuf_ref_t  control = 0;
2401 	socket_ref_t so;
2402 	sockaddr_ref_t  fromsa = 0;
2403 	fileproc_ref_t  fp;
2404 
2405 	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
2406 	if ((error = fp_get_ftype(p, s, DTYPE_SOCKET, ENOTSOCK, &fp))) {
2407 		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2408 		return error;
2409 	}
2410 	so = (struct socket *)fp_get_data(fp);
2411 
2412 #if CONFIG_MACF_SOCKET_SUBSET
2413 	/*
2414 	 * We check the state without holding the socket lock;
2415 	 * if a race condition occurs, it would simply result
2416 	 * in an extra call to the MAC check function.
2417 	 */
2418 	if (!(so->so_state & SS_DEFUNCT) &&
2419 	    !(so->so_state & SS_ISCONNECTED) &&
2420 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2421 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2422 		goto out1;
2423 	}
2424 #endif /* MAC_SOCKET_SUBSET */
2425 	if (uio_resid(uiop) < 0 || uio_resid(uiop) > INT_MAX) {
2426 		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
2427 		error = EINVAL;
2428 		goto out1;
2429 	}
2430 
2431 	len = uio_resid(uiop);
2432 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
2433 	    NULL, mp->msg_control ? &control : NULL,
2434 	    &mp->msg_flags);
2435 	if (fromsa) {
2436 		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
2437 		    fromsa);
2438 	}
2439 	if (error) {
2440 		if (uio_resid(uiop) != len && (error == ERESTART ||
2441 		    error == EINTR || error == EWOULDBLOCK)) {
2442 			error = 0;
2443 		}
2444 	}
2445 	if (error) {
2446 		goto out;
2447 	}
2448 
2449 	*retval = (int32_t)(len - uio_resid(uiop));
2450 
2451 	if (mp->msg_name) {
2452 		error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
2453 		if (error) {
2454 			goto out;
2455 		}
2456 		/* return the actual, untruncated address length */
2457 		if (namelenp &&
2458 		    (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2459 		    sizeof(int)))) {
2460 			goto out;
2461 		}
2462 	}
2463 
2464 	if (mp->msg_control) {
2465 		error = copyout_control(p, control, mp->msg_control,
2466 		    &mp->msg_controllen, &mp->msg_flags, so);
2467 	}
2468 out:
2469 	free_sockaddr(fromsa);
2470 	if (control) {
2471 		m_freem(control);
2472 	}
2473 	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2474 out1:
2475 	fp_drop(p, s, fp, 0);
2476 	return error;
2477 }
2478 
2479 /*
2480  * Returns:	0			Success
2481  *		ENOMEM
2482  *	copyin:EFAULT
2483  *	recvit:???
2484  *	read:???			[4056224: applicable for pipes]
2485  *
2486  * Notes:	The read entry point is only called as part of support for
2487  *		binary backward compatability; new code should use read
2488  *		instead of recv or recvfrom when attempting to read data
2489  *		from pipes.
2490  *
2491  *		For full documentation of the return codes from recvit, see
2492  *		the block header for the recvit function.
2493  */
2494 int
recvfrom(proc_ref_t p,struct recvfrom_args * uap,int32_ref_t retval)2495 recvfrom(proc_ref_t p, struct recvfrom_args *uap, int32_ref_t retval)
2496 {
2497 	__pthread_testcancel(1);
2498 	return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2499 	           retval);
2500 }
2501 
2502 int
recvfrom_nocancel(proc_ref_t p,struct recvfrom_nocancel_args * uap,int32_ref_t retval)2503 recvfrom_nocancel(proc_ref_t p, struct recvfrom_nocancel_args *uap,
2504     int32_ref_t retval)
2505 {
2506 	struct user_msghdr msg;
2507 	int error;
2508 	uio_t auio = NULL;
2509 
2510 	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2511 	AUDIT_ARG(fd, uap->s);
2512 
2513 	if (uap->fromlenaddr) {
2514 		error = copyin(uap->fromlenaddr,
2515 		    (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2516 		if (error) {
2517 			return error;
2518 		}
2519 	} else {
2520 		msg.msg_namelen = 0;
2521 	}
2522 	msg.msg_name = uap->from;
2523 	auio = uio_create(1, 0,
2524 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2525 	    UIO_READ);
2526 	if (auio == NULL) {
2527 		return ENOMEM;
2528 	}
2529 
2530 	uio_addiov(auio, uap->buf, uap->len);
2531 	/* no need to set up msg_iov.  recvit uses uio_t we send it */
2532 	msg.msg_iov = 0;
2533 	msg.msg_iovlen = 0;
2534 	msg.msg_control = 0;
2535 	msg.msg_controllen = 0;
2536 	msg.msg_flags = uap->flags;
2537 	error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2538 	if (auio != NULL) {
2539 		uio_free(auio);
2540 	}
2541 
2542 	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2543 
2544 	return error;
2545 }
2546 
2547 /*
2548  * Returns:	0			Success
2549  *		EMSGSIZE
2550  *		ENOMEM
2551  *	copyin:EFAULT
2552  *	copyout:EFAULT
2553  *	recvit:???
2554  *
2555  * Notes:	For full documentation of the return codes from recvit, see
2556  *		the block header for the recvit function.
2557  */
2558 int
recvmsg(proc_ref_t p,struct recvmsg_args * uap,int32_ref_t retval)2559 recvmsg(proc_ref_t p, struct recvmsg_args *uap, int32_ref_t retval)
2560 {
2561 	__pthread_testcancel(1);
2562 	return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2563 	           retval);
2564 }
2565 
2566 int
recvmsg_nocancel(proc_ref_t p,struct recvmsg_nocancel_args * uap,int32_ref_t retval)2567 recvmsg_nocancel(proc_ref_t p, struct recvmsg_nocancel_args *uap,
2568     int32_ref_t retval)
2569 {
2570 	struct user32_msghdr msg32;
2571 	struct user64_msghdr msg64;
2572 	struct user_msghdr user_msg;
2573 	caddr_t msghdrp;
2574 	int     size_of_msghdr;
2575 	user_addr_t uiov;
2576 	int error;
2577 	uio_t auio = NULL;
2578 	struct user_iovec *iovp;
2579 
2580 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2581 
2582 	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2583 	AUDIT_ARG(fd, uap->s);
2584 	if (is_p_64bit_process) {
2585 		msghdrp = (caddr_t)&msg64;
2586 		size_of_msghdr = sizeof(msg64);
2587 	} else {
2588 		msghdrp = (caddr_t)&msg32;
2589 		size_of_msghdr = sizeof(msg32);
2590 	}
2591 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
2592 	if (error) {
2593 		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2594 		return error;
2595 	}
2596 
2597 	/* only need to copy if user process is not 64-bit */
2598 	if (is_p_64bit_process) {
2599 		user_msg.msg_flags = msg64.msg_flags;
2600 		user_msg.msg_controllen = msg64.msg_controllen;
2601 		user_msg.msg_control = (user_addr_t)msg64.msg_control;
2602 		user_msg.msg_iovlen = msg64.msg_iovlen;
2603 		user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
2604 		user_msg.msg_namelen = msg64.msg_namelen;
2605 		user_msg.msg_name = (user_addr_t)msg64.msg_name;
2606 	} else {
2607 		user_msg.msg_flags = msg32.msg_flags;
2608 		user_msg.msg_controllen = msg32.msg_controllen;
2609 		user_msg.msg_control = msg32.msg_control;
2610 		user_msg.msg_iovlen = msg32.msg_iovlen;
2611 		user_msg.msg_iov = msg32.msg_iov;
2612 		user_msg.msg_namelen = msg32.msg_namelen;
2613 		user_msg.msg_name = msg32.msg_name;
2614 	}
2615 
2616 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2617 		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2618 		    0, 0, 0, 0);
2619 		return EMSGSIZE;
2620 	}
2621 
2622 	user_msg.msg_flags = uap->flags;
2623 
2624 	/* allocate a uio large enough to hold the number of iovecs passed */
2625 	auio = uio_create(user_msg.msg_iovlen, 0,
2626 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
2627 	    UIO_READ);
2628 	if (auio == NULL) {
2629 		error = ENOMEM;
2630 		goto done;
2631 	}
2632 
2633 	/*
2634 	 * get location of iovecs within the uio.  then copyin the iovecs from
2635 	 * user space.
2636 	 */
2637 	iovp = uio_iovsaddr(auio);
2638 	if (iovp == NULL) {
2639 		error = ENOMEM;
2640 		goto done;
2641 	}
2642 	uiov = user_msg.msg_iov;
2643 	user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2644 	error = copyin_user_iovec_array(uiov,
2645 	    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2646 	    user_msg.msg_iovlen, iovp);
2647 	if (error) {
2648 		goto done;
2649 	}
2650 
2651 	/* finish setup of uio_t */
2652 	error = uio_calculateresid(auio);
2653 	if (error) {
2654 		goto done;
2655 	}
2656 
2657 	error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2658 	if (!error) {
2659 		user_msg.msg_iov = uiov;
2660 		if (is_p_64bit_process) {
2661 			msg64.msg_flags = user_msg.msg_flags;
2662 			msg64.msg_controllen = user_msg.msg_controllen;
2663 			msg64.msg_control = user_msg.msg_control;
2664 			msg64.msg_iovlen = user_msg.msg_iovlen;
2665 			msg64.msg_iov = user_msg.msg_iov;
2666 			msg64.msg_namelen = user_msg.msg_namelen;
2667 			msg64.msg_name = user_msg.msg_name;
2668 		} else {
2669 			msg32.msg_flags = user_msg.msg_flags;
2670 			msg32.msg_controllen = user_msg.msg_controllen;
2671 			msg32.msg_control = (user32_addr_t)user_msg.msg_control;
2672 			msg32.msg_iovlen = user_msg.msg_iovlen;
2673 			msg32.msg_iov = (user32_addr_t)user_msg.msg_iov;
2674 			msg32.msg_namelen = user_msg.msg_namelen;
2675 			msg32.msg_name = (user32_addr_t)user_msg.msg_name;
2676 		}
2677 		error = copyout(msghdrp, uap->msg, size_of_msghdr);
2678 	}
2679 done:
2680 	if (auio != NULL) {
2681 		uio_free(auio);
2682 	}
2683 	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2684 	return error;
2685 }
2686 
2687 __attribute__((noinline))
2688 static int
recvmsg_x_array(proc_ref_t p,socket_ref_t so,struct recvmsg_x_args * uap,user_ssize_t * retval)2689 recvmsg_x_array(proc_ref_t p, socket_ref_t so, struct recvmsg_x_args *uap, user_ssize_t *retval)
2690 {
2691 	int error = EOPNOTSUPP;
2692 	user_msghdr_x_ptr_t user_msg_x = NULL;
2693 	recv_msg_elem_ptr_t recv_msg_array = NULL;
2694 	user_ssize_t len_before = 0, len_after;
2695 	size_t size_of_msghdr;
2696 	void_ptr_t umsgp = NULL;
2697 	u_int i;
2698 	u_int uiocnt;
2699 	int flags = uap->flags;
2700 
2701 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2702 
2703 	size_of_msghdr = is_p_64bit_process ?
2704 	    sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2705 
2706 	/*
2707 	 * Support only a subset of message flags
2708 	 */
2709 	if (uap->flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA |  MSG_NBIO)) {
2710 		return EOPNOTSUPP;
2711 	}
2712 	/*
2713 	 * Input parameter range check
2714 	 */
2715 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2716 		error = EINVAL;
2717 		goto out;
2718 	}
2719 	if (uap->cnt > somaxrecvmsgx) {
2720 		uap->cnt = somaxrecvmsgx > 0 ? somaxrecvmsgx : 1;
2721 	}
2722 
2723 	user_msg_x = kalloc_type(struct user_msghdr_x, uap->cnt,
2724 	    Z_WAITOK | Z_ZERO);
2725 	if (user_msg_x == NULL) {
2726 		DBG_PRINTF("%s user_msg_x alloc failed", __func__);
2727 		error = ENOMEM;
2728 		goto out;
2729 	}
2730 	recv_msg_array = alloc_recv_msg_array(uap->cnt);
2731 	if (recv_msg_array == NULL) {
2732 		DBG_PRINTF("%s alloc_recv_msg_array() failed", __func__);
2733 		error = ENOMEM;
2734 		goto out;
2735 	}
2736 
2737 	umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
2738 	if (umsgp == NULL) {
2739 		DBG_PRINTF("%s umsgp alloc failed", __func__);
2740 		error = ENOMEM;
2741 		goto out;
2742 	}
2743 	error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2744 	if (error) {
2745 		DBG_PRINTF("%s copyin() failed", __func__);
2746 		goto out;
2747 	}
2748 	error = internalize_recv_msghdr_array(umsgp,
2749 	    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2750 	    UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2751 	if (error) {
2752 		DBG_PRINTF("%s copyin_user_msghdr_array() failed", __func__);
2753 		goto out;
2754 	}
2755 	/*
2756 	 * Make sure the size of each message iovec and
2757 	 * the aggregate size of all the iovec is valid
2758 	 */
2759 	if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2760 		error = EINVAL;
2761 		goto out;
2762 	}
2763 	/*
2764 	 * Sanity check on passed arguments
2765 	 */
2766 	for (i = 0; i < uap->cnt; i++) {
2767 		struct user_msghdr_x *mp = user_msg_x + i;
2768 
2769 		if (mp->msg_flags != 0) {
2770 			error = EINVAL;
2771 			goto out;
2772 		}
2773 	}
2774 #if CONFIG_MACF_SOCKET_SUBSET
2775 	/*
2776 	 * We check the state without holding the socket lock;
2777 	 * if a race condition occurs, it would simply result
2778 	 * in an extra call to the MAC check function.
2779 	 */
2780 	if (!(so->so_state & SS_DEFUNCT) &&
2781 	    !(so->so_state & SS_ISCONNECTED) &&
2782 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2783 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2784 		goto out;
2785 	}
2786 #endif /* MAC_SOCKET_SUBSET */
2787 
2788 	len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2789 
2790 	for (i = 0; i < uap->cnt; i++) {
2791 		struct recv_msg_elem *recv_msg_elem;
2792 		uio_t auio;
2793 		sockaddr_ref_ref_t psa;
2794 		struct mbuf **controlp;
2795 
2796 		recv_msg_elem = recv_msg_array + i;
2797 		auio = recv_msg_elem->uio;
2798 
2799 		/*
2800 		 * Do not block if we got at least one packet
2801 		 */
2802 		if (i > 0) {
2803 			flags |= MSG_DONTWAIT;
2804 		}
2805 
2806 		psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2807 		    &recv_msg_elem->psa : NULL;
2808 		controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2809 		    &recv_msg_elem->controlp : NULL;
2810 
2811 		error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2812 		    auio, NULL, controlp, &flags);
2813 		if (error) {
2814 			break;
2815 		}
2816 		/*
2817 		 * We have some data
2818 		 */
2819 		recv_msg_elem->which |= SOCK_MSG_DATA;
2820 		/*
2821 		 * Set the messages flags for this packet
2822 		 */
2823 		flags &= ~MSG_DONTWAIT;
2824 		recv_msg_elem->flags = flags;
2825 		/*
2826 		 * Stop on partial copy
2827 		 */
2828 		if (recv_msg_elem->flags & (MSG_RCVMORE | MSG_TRUNC)) {
2829 			break;
2830 		}
2831 	}
2832 
2833 	len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2834 
2835 	if (error) {
2836 		if (len_after != len_before && (error == ERESTART ||
2837 		    error == EINTR || error == EWOULDBLOCK)) {
2838 			error = 0;
2839 		} else {
2840 			goto out;
2841 		}
2842 	}
2843 
2844 	uiocnt = externalize_recv_msghdr_array(p, so, umsgp,
2845 	    uap->cnt, user_msg_x, recv_msg_array, &error);
2846 	if (error != 0) {
2847 		goto out;
2848 	}
2849 
2850 	error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2851 	if (error) {
2852 		DBG_PRINTF("%s copyout() failed", __func__);
2853 		goto out;
2854 	}
2855 	*retval = (int)(uiocnt);
2856 
2857 out:
2858 	kfree_data(umsgp, uap->cnt * size_of_msghdr);
2859 	free_recv_msg_array(recv_msg_array, uap->cnt);
2860 	kfree_type(struct user_msghdr_x, uap->cnt, user_msg_x);
2861 
2862 	return error;
2863 }
2864 
2865 int
recvmsg_x(struct proc * p,struct recvmsg_x_args * uap,user_ssize_t * retval)2866 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2867 {
2868 	int error = EOPNOTSUPP;
2869 	socket_ref_t so;
2870 	size_t size_of_msghdrx;
2871 	caddr_t msghdrxp;
2872 	struct user32_msghdr_x msghdrx32 = {};
2873 	struct user64_msghdr_x msghdrx64 = {};
2874 	int spacetype;
2875 	u_int i;
2876 	uio_t auio = NULL;
2877 	caddr_t src;
2878 	int flags;
2879 	struct mbuf *pkt_list = NULL, *m;
2880 	struct mbuf *addr_list = NULL, *m_addr;
2881 	struct mbuf *ctl_list = NULL, *control;
2882 	u_int pktcnt;
2883 
2884 	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2885 
2886 	error = file_socket(uap->s, &so);
2887 	if (error) {
2888 		goto done_no_filedrop;
2889 	}
2890 	if (so == NULL) {
2891 		error = EBADF;
2892 		goto done;
2893 	}
2894 
2895 #if CONFIG_MACF_SOCKET_SUBSET
2896 	/*
2897 	 * We check the state without holding the socket lock;
2898 	 * if a race condition occurs, it would simply result
2899 	 * in an extra call to the MAC check function.
2900 	 */
2901 	if (!(so->so_state & SS_DEFUNCT) &&
2902 	    !(so->so_state & SS_ISCONNECTED) &&
2903 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2904 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2905 		goto done;
2906 	}
2907 #endif /* MAC_SOCKET_SUBSET */
2908 
2909 	/*
2910 	 * With soreceive_m_list, all packets must be uniform, with address and
2911 	 * control as they are returned in parallel lists and it's only guaranteed
2912 	 * when pru_send_list is supported
2913 	 */
2914 	if (do_recvmsg_x_donttrunc != 0 || (so->so_options & SO_DONTTRUNC)) {
2915 		error = recvmsg_x_array(p, so, uap, retval);
2916 		goto done;
2917 	}
2918 
2919 	/*
2920 	 * Input parameter range check
2921 	 */
2922 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2923 		error = EINVAL;
2924 		goto done;
2925 	}
2926 	if (uap->cnt > somaxrecvmsgx) {
2927 		uap->cnt = somaxrecvmsgx > 0 ? somaxrecvmsgx : 1;
2928 	}
2929 
2930 	if (IS_64BIT_PROCESS(p)) {
2931 		msghdrxp = (caddr_t)&msghdrx64;
2932 		size_of_msghdrx = sizeof(struct user64_msghdr_x);
2933 		spacetype = UIO_USERSPACE64;
2934 	} else {
2935 		msghdrxp = (caddr_t)&msghdrx32;
2936 		size_of_msghdrx = sizeof(struct user32_msghdr_x);
2937 		spacetype = UIO_USERSPACE32;
2938 	}
2939 	src = (caddr_t)uap->msgp;
2940 
2941 	flags = uap->flags;
2942 
2943 	/*
2944 	 * Only allow MSG_DONTWAIT
2945 	 */
2946 	if ((flags & ~(MSG_DONTWAIT | MSG_NBIO)) != 0) {
2947 		error = EINVAL;
2948 		goto done;
2949 	}
2950 
2951 	/*
2952 	 * Receive list of packet in a single call
2953 	 */
2954 	pktcnt = uap->cnt;
2955 	error = soreceive_m_list(so, &pktcnt, &addr_list, &pkt_list, &ctl_list,
2956 	    &flags);
2957 	if (error != 0) {
2958 		if (pktcnt != 0 && (error == ERESTART ||
2959 		    error == EINTR || error == EWOULDBLOCK)) {
2960 			error = 0;
2961 		} else {
2962 			goto done;
2963 		}
2964 	}
2965 
2966 	m_addr = addr_list;
2967 	m = pkt_list;
2968 	control = ctl_list;
2969 
2970 	for (i = 0; i < pktcnt; i++) {
2971 		struct user_msghdr user_msg;
2972 		ssize_t len;
2973 		struct user_iovec *iovp;
2974 		struct mbuf *n;
2975 
2976 		if (m->m_type != MT_OOBDATA && m->m_type != MT_DATA &&
2977 		    m->m_type != MT_HEADER) {
2978 			panic("%s: m %p m_type %d != MT_DATA", __func__, m, m->m_type);
2979 		}
2980 
2981 		error = copyin((user_addr_t)(src + i * size_of_msghdrx),
2982 		    msghdrxp, size_of_msghdrx);
2983 		if (error) {
2984 			DBG_PRINTF("%s copyin() msghdrx failed %d\n",
2985 			    __func__, error);
2986 			goto done;
2987 		}
2988 		if (spacetype == UIO_USERSPACE64) {
2989 			user_msg.msg_name = msghdrx64.msg_name;
2990 			user_msg.msg_namelen = msghdrx64.msg_namelen;
2991 			user_msg.msg_iov = msghdrx64.msg_iov;
2992 			user_msg.msg_iovlen = msghdrx64.msg_iovlen;
2993 			user_msg.msg_control = msghdrx64.msg_control;
2994 			user_msg.msg_controllen = msghdrx64.msg_controllen;
2995 		} else {
2996 			user_msg.msg_name = msghdrx32.msg_name;
2997 			user_msg.msg_namelen = msghdrx32.msg_namelen;
2998 			user_msg.msg_iov = msghdrx32.msg_iov;
2999 			user_msg.msg_iovlen = msghdrx32.msg_iovlen;
3000 			user_msg.msg_control = msghdrx32.msg_control;
3001 			user_msg.msg_controllen = msghdrx32.msg_controllen;
3002 		}
3003 		user_msg.msg_flags = 0;
3004 		if (user_msg.msg_iovlen <= 0 ||
3005 		    user_msg.msg_iovlen > UIO_MAXIOV) {
3006 			error = EMSGSIZE;
3007 			DBG_PRINTF("%s bad msg_iovlen, error %d\n",
3008 			    __func__, error);
3009 			goto done;
3010 		}
3011 		/*
3012 		 * Attempt to reuse the uio if large enough, otherwise we need
3013 		 * a new one
3014 		 */
3015 		if (auio != NULL) {
3016 			if (auio->uio_max_iovs <= user_msg.msg_iovlen) {
3017 				uio_reset(auio, 0, spacetype, UIO_READ);
3018 			} else {
3019 				uio_free(auio);
3020 				auio = NULL;
3021 			}
3022 		}
3023 		if (auio == NULL) {
3024 			auio = uio_create(user_msg.msg_iovlen, 0, spacetype,
3025 			    UIO_READ);
3026 			if (auio == NULL) {
3027 				error = ENOBUFS;
3028 				DBG_PRINTF("%s uio_create() failed %d\n",
3029 				    __func__, error);
3030 				goto done;
3031 			}
3032 		}
3033 		/*
3034 		 * get location of iovecs within the uio then copy the iovecs
3035 		 * from user space.
3036 		 */
3037 		iovp = uio_iovsaddr(auio);
3038 		if (iovp == NULL) {
3039 			error = ENOMEM;
3040 			DBG_PRINTF("%s uio_iovsaddr() failed %d\n",
3041 			    __func__, error);
3042 			goto done;
3043 		}
3044 		error = copyin_user_iovec_array(user_msg.msg_iov,
3045 		    spacetype, user_msg.msg_iovlen, iovp);
3046 		if (error != 0) {
3047 			DBG_PRINTF("%s copyin_user_iovec_array() failed %d\n",
3048 			    __func__, error);
3049 			goto done;
3050 		}
3051 		error = uio_calculateresid(auio);
3052 		if (error != 0) {
3053 			DBG_PRINTF("%s uio_calculateresid() failed %d\n",
3054 			    __func__, error);
3055 			goto done;
3056 		}
3057 		user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
3058 
3059 		len = uio_resid(auio);
3060 		for (n = m; n != NULL; n = n->m_next) {
3061 			user_ssize_t resid = uio_resid(auio);
3062 			if (resid < n->m_len) {
3063 				error = uiomove(mtod(n, caddr_t), (int)n->m_len, auio);
3064 				if (error != 0) {
3065 					DBG_PRINTF("%s uiomove() failed\n",
3066 					    __func__);
3067 					goto done;
3068 				}
3069 				flags |= MSG_TRUNC;
3070 				break;
3071 			}
3072 
3073 			error = uiomove(mtod(n, caddr_t), (int)n->m_len, auio);
3074 			if (error != 0) {
3075 				DBG_PRINTF("%s uiomove() failed\n",
3076 				    __func__);
3077 				goto done;
3078 			}
3079 		}
3080 		len -= uio_resid(auio);
3081 
3082 		if (user_msg.msg_name != 0 && user_msg.msg_namelen != 0) {
3083 			error = copyout_maddr(m_addr, user_msg.msg_name,
3084 			    &user_msg.msg_namelen);
3085 			if (error) {
3086 				DBG_PRINTF("%s copyout_maddr()  failed\n",
3087 				    __func__);
3088 				goto done;
3089 			}
3090 		}
3091 		if (user_msg.msg_control != 0 && user_msg.msg_controllen != 0) {
3092 			error = copyout_control(p, control,
3093 			    user_msg.msg_control, &user_msg.msg_controllen,
3094 			    &user_msg.msg_flags, so);
3095 			if (error) {
3096 				DBG_PRINTF("%s copyout_control() failed\n",
3097 				    __func__);
3098 				goto done;
3099 			}
3100 		}
3101 		/*
3102 		 * Note: the original msg_iovlen and msg_iov do not change
3103 		 */
3104 		if (spacetype == UIO_USERSPACE64) {
3105 			msghdrx64.msg_flags = user_msg.msg_flags;
3106 			msghdrx64.msg_controllen = user_msg.msg_controllen;
3107 			msghdrx64.msg_control = user_msg.msg_control;
3108 			msghdrx64.msg_namelen = user_msg.msg_namelen;
3109 			msghdrx64.msg_name = user_msg.msg_name;
3110 			msghdrx64.msg_datalen = len;
3111 		} else {
3112 			msghdrx32.msg_flags = user_msg.msg_flags;
3113 			msghdrx32.msg_controllen = user_msg.msg_controllen;
3114 			msghdrx32.msg_control = (user32_addr_t) user_msg.msg_control;
3115 			msghdrx32.msg_name = user_msg.msg_namelen;
3116 			msghdrx32.msg_name = (user32_addr_t) user_msg.msg_name;
3117 			msghdrx32.msg_datalen = (user32_size_t) len;
3118 		}
3119 		error = copyout(msghdrxp,
3120 		    (user_addr_t)(src + i * size_of_msghdrx),
3121 		    size_of_msghdrx);
3122 		if (error) {
3123 			DBG_PRINTF("%s copyout() msghdrx failed\n", __func__);
3124 			goto done;
3125 		}
3126 
3127 		m = m->m_nextpkt;
3128 		if (control != NULL) {
3129 			control = control->m_nextpkt;
3130 		}
3131 		if (m_addr != NULL) {
3132 			m_addr = m_addr->m_nextpkt;
3133 		}
3134 	}
3135 
3136 	uap->flags = flags;
3137 
3138 	*retval = (int)i;
3139 done:
3140 	file_drop(uap->s);
3141 
3142 done_no_filedrop:
3143 	if (pkt_list != NULL) {
3144 		m_freem_list(pkt_list);
3145 	}
3146 	if (addr_list != NULL) {
3147 		m_freem_list(addr_list);
3148 	}
3149 	if (ctl_list != NULL) {
3150 		m_freem_list(ctl_list);
3151 	}
3152 	if (auio != NULL) {
3153 		uio_free(auio);
3154 	}
3155 
3156 	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
3157 
3158 	return error;
3159 }
3160 
3161 /*
3162  * Returns:	0			Success
3163  *		EBADF
3164  *	file_socket:ENOTSOCK
3165  *	file_socket:EBADF
3166  *	soshutdown:EINVAL
3167  *	soshutdown:ENOTCONN
3168  *	soshutdown:EADDRNOTAVAIL[TCP]
3169  *	soshutdown:ENOBUFS[TCP]
3170  *	soshutdown:EMSGSIZE[TCP]
3171  *	soshutdown:EHOSTUNREACH[TCP]
3172  *	soshutdown:ENETUNREACH[TCP]
3173  *	soshutdown:ENETDOWN[TCP]
3174  *	soshutdown:ENOMEM[TCP]
3175  *	soshutdown:EACCES[TCP]
3176  *	soshutdown:EMSGSIZE[TCP]
3177  *	soshutdown:ENOBUFS[TCP]
3178  *	soshutdown:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
3179  *	soshutdown:???			[other protocol families]
3180  */
3181 /* ARGSUSED */
3182 int
shutdown(__unused proc_ref_t p,struct shutdown_args * uap,__unused int32_ref_t retval)3183 shutdown(__unused proc_ref_t p, struct shutdown_args *uap,
3184     __unused int32_ref_t retval)
3185 {
3186 	socket_ref_t so;
3187 	int error;
3188 
3189 	AUDIT_ARG(fd, uap->s);
3190 	error = file_socket(uap->s, &so);
3191 	if (error) {
3192 		return error;
3193 	}
3194 	if (so == NULL) {
3195 		error = EBADF;
3196 		goto out;
3197 	}
3198 	error =  soshutdown((struct socket *)so, uap->how);
3199 out:
3200 	file_drop(uap->s);
3201 	return error;
3202 }
3203 
3204 /*
3205  * Returns:	0			Success
3206  *		EFAULT
3207  *		EINVAL
3208  *		EACCES			Mandatory Access Control failure
3209  *	file_socket:ENOTSOCK
3210  *	file_socket:EBADF
3211  *	sosetopt:EINVAL
3212  *	sosetopt:ENOPROTOOPT
3213  *	sosetopt:ENOBUFS
3214  *	sosetopt:EDOM
3215  *	sosetopt:EFAULT
3216  *	sosetopt:EOPNOTSUPP[AF_UNIX]
3217  *	sosetopt:???
3218  */
3219 /* ARGSUSED */
3220 int
setsockopt(proc_ref_t p,setsockopt_args_ref_t uap,__unused int32_ref_t retval)3221 setsockopt(proc_ref_t p, setsockopt_args_ref_t uap,
3222     __unused int32_ref_t retval)
3223 {
3224 	socket_ref_t so;
3225 	struct sockopt sopt;
3226 	int error;
3227 
3228 	AUDIT_ARG(fd, uap->s);
3229 	if (uap->val == 0 && uap->valsize != 0) {
3230 		return EFAULT;
3231 	}
3232 	/* No bounds checking on size (it's unsigned) */
3233 
3234 	error = file_socket(uap->s, &so);
3235 	if (error) {
3236 		return error;
3237 	}
3238 
3239 	sopt.sopt_dir = SOPT_SET;
3240 	sopt.sopt_level = uap->level;
3241 	sopt.sopt_name = uap->name;
3242 	sopt.sopt_val = uap->val;
3243 	sopt.sopt_valsize = uap->valsize;
3244 	sopt.sopt_p = p;
3245 
3246 	if (so == NULL) {
3247 		error = EINVAL;
3248 		goto out;
3249 	}
3250 #if CONFIG_MACF_SOCKET_SUBSET
3251 	if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
3252 	    &sopt)) != 0) {
3253 		goto out;
3254 	}
3255 #endif /* MAC_SOCKET_SUBSET */
3256 	error = sosetoptlock(so, &sopt, 1);     /* will lock socket */
3257 out:
3258 	file_drop(uap->s);
3259 	return error;
3260 }
3261 
3262 
3263 
3264 /*
3265  * Returns:	0			Success
3266  *		EINVAL
3267  *		EBADF
3268  *		EACCES			Mandatory Access Control failure
3269  *	copyin:EFAULT
3270  *	copyout:EFAULT
3271  *	file_socket:ENOTSOCK
3272  *	file_socket:EBADF
3273  *	sogetopt:???
3274  */
3275 int
getsockopt(proc_ref_t p,struct getsockopt_args * uap,__unused int32_ref_t retval)3276 getsockopt(proc_ref_t p, struct getsockopt_args  *uap,
3277     __unused int32_ref_t retval)
3278 {
3279 	int             error;
3280 	socklen_t       valsize;
3281 	struct sockopt  sopt;
3282 	socket_ref_t so;
3283 
3284 	error = file_socket(uap->s, &so);
3285 	if (error) {
3286 		return error;
3287 	}
3288 	if (uap->val) {
3289 		error = copyin(uap->avalsize, (caddr_t)&valsize,
3290 		    sizeof(valsize));
3291 		if (error) {
3292 			goto out;
3293 		}
3294 		/* No bounds checking on size (it's unsigned) */
3295 	} else {
3296 		valsize = 0;
3297 	}
3298 	sopt.sopt_dir = SOPT_GET;
3299 	sopt.sopt_level = uap->level;
3300 	sopt.sopt_name = uap->name;
3301 	sopt.sopt_val = uap->val;
3302 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
3303 	sopt.sopt_p = p;
3304 
3305 	if (so == NULL) {
3306 		error = EBADF;
3307 		goto out;
3308 	}
3309 #if CONFIG_MACF_SOCKET_SUBSET
3310 	if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
3311 	    &sopt)) != 0) {
3312 		goto out;
3313 	}
3314 #endif /* MAC_SOCKET_SUBSET */
3315 	error = sogetoptlock((struct socket *)so, &sopt, 1);    /* will lock */
3316 	if (error == 0) {
3317 		valsize = (socklen_t)sopt.sopt_valsize;
3318 		error = copyout((caddr_t)&valsize, uap->avalsize,
3319 		    sizeof(valsize));
3320 	}
3321 out:
3322 	file_drop(uap->s);
3323 	return error;
3324 }
3325 
3326 
3327 /*
3328  * Get socket name.
3329  *
3330  * Returns:	0			Success
3331  *		EBADF
3332  *	file_socket:ENOTSOCK
3333  *	file_socket:EBADF
3334  *	copyin:EFAULT
3335  *	copyout:EFAULT
3336  *	<pru_sockaddr>:ENOBUFS[TCP]
3337  *	<pru_sockaddr>:ECONNRESET[TCP]
3338  *	<pru_sockaddr>:EINVAL[AF_UNIX]
3339  *	<sf_getsockname>:???
3340  */
3341 /* ARGSUSED */
3342 int
getsockname(__unused proc_ref_t p,struct getsockname_args * uap,__unused int32_ref_t retval)3343 getsockname(__unused proc_ref_t p, struct getsockname_args *uap,
3344     __unused int32_ref_t retval)
3345 {
3346 	socket_ref_t so;
3347 	sockaddr_ref_t  sa;
3348 	socklen_t len;
3349 	socklen_t sa_len;
3350 	int error;
3351 
3352 	error = file_socket(uap->fdes, &so);
3353 	if (error) {
3354 		return error;
3355 	}
3356 	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
3357 	if (error) {
3358 		goto out;
3359 	}
3360 	if (so == NULL) {
3361 		error = EBADF;
3362 		goto out;
3363 	}
3364 	sa = 0;
3365 	socket_lock(so, 1);
3366 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
3367 	if (error == 0) {
3368 		error = sflt_getsockname(so, &sa);
3369 		if (error == EJUSTRETURN) {
3370 			error = 0;
3371 		}
3372 	}
3373 	socket_unlock(so, 1);
3374 	if (error) {
3375 		goto bad;
3376 	}
3377 	if (sa == 0) {
3378 		len = 0;
3379 		goto gotnothing;
3380 	}
3381 
3382 	sa_len = sa->sa_len;
3383 	len = MIN(len, sa_len);
3384 	error = copyout((caddr_t)sa, uap->asa, len);
3385 	if (error) {
3386 		goto bad;
3387 	}
3388 	/* return the actual, untruncated address length */
3389 	len = sa_len;
3390 gotnothing:
3391 	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
3392 bad:
3393 	free_sockaddr(sa);
3394 out:
3395 	file_drop(uap->fdes);
3396 	return error;
3397 }
3398 
3399 /*
3400  * Get name of peer for connected socket.
3401  *
3402  * Returns:	0			Success
3403  *		EBADF
3404  *		EINVAL
3405  *		ENOTCONN
3406  *	file_socket:ENOTSOCK
3407  *	file_socket:EBADF
3408  *	copyin:EFAULT
3409  *	copyout:EFAULT
3410  *	<pru_peeraddr>:???
3411  *	<sf_getpeername>:???
3412  */
3413 /* ARGSUSED */
3414 int
getpeername(__unused proc_ref_t p,struct getpeername_args * uap,__unused int32_ref_t retval)3415 getpeername(__unused proc_ref_t p, struct getpeername_args *uap,
3416     __unused int32_ref_t retval)
3417 {
3418 	socket_ref_t so;
3419 	sockaddr_ref_t  sa;
3420 	socklen_t len;
3421 	socklen_t sa_len;
3422 	int error;
3423 
3424 	error = file_socket(uap->fdes, &so);
3425 	if (error) {
3426 		return error;
3427 	}
3428 	if (so == NULL) {
3429 		error = EBADF;
3430 		goto out;
3431 	}
3432 
3433 	socket_lock(so, 1);
3434 
3435 	if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
3436 	    (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
3437 		/* the socket has been shutdown, no more getpeername's */
3438 		socket_unlock(so, 1);
3439 		error = EINVAL;
3440 		goto out;
3441 	}
3442 
3443 	if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
3444 		socket_unlock(so, 1);
3445 		error = ENOTCONN;
3446 		goto out;
3447 	}
3448 	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
3449 	if (error) {
3450 		socket_unlock(so, 1);
3451 		goto out;
3452 	}
3453 	sa = 0;
3454 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
3455 	if (error == 0) {
3456 		error = sflt_getpeername(so, &sa);
3457 		if (error == EJUSTRETURN) {
3458 			error = 0;
3459 		}
3460 	}
3461 	socket_unlock(so, 1);
3462 	if (error) {
3463 		goto bad;
3464 	}
3465 	if (sa == 0) {
3466 		len = 0;
3467 		goto gotnothing;
3468 	}
3469 	sa_len = sa->sa_len;
3470 	len = MIN(len, sa_len);
3471 	error = copyout(sa, uap->asa, len);
3472 	if (error) {
3473 		goto bad;
3474 	}
3475 	/* return the actual, untruncated address length */
3476 	len = sa_len;
3477 gotnothing:
3478 	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
3479 bad:
3480 	free_sockaddr(sa);
3481 out:
3482 	file_drop(uap->fdes);
3483 	return error;
3484 }
3485 
3486 int
sockargs(struct mbuf ** mp,user_addr_t data,socklen_t buflen,int type)3487 sockargs(struct mbuf **mp, user_addr_t data, socklen_t buflen, int type)
3488 {
3489 	sockaddr_ref_t sa;
3490 	struct mbuf *m;
3491 	int error;
3492 	socklen_t alloc_buflen = buflen;
3493 
3494 	if (buflen > INT_MAX / 2) {
3495 		return EINVAL;
3496 	}
3497 	if (type == MT_SONAME && (buflen > SOCK_MAXADDRLEN ||
3498 	    buflen < offsetof(struct sockaddr, sa_data[0]))) {
3499 		return EINVAL;
3500 	}
3501 	if (type == MT_CONTROL && buflen < sizeof(struct cmsghdr)) {
3502 		return EINVAL;
3503 	}
3504 
3505 #ifdef __LP64__
3506 	/*
3507 	 * The fd's in the buffer must expand to be pointers, thus we need twice
3508 	 * as much space
3509 	 */
3510 	if (type == MT_CONTROL) {
3511 		alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
3512 		    sizeof(struct cmsghdr);
3513 	}
3514 #endif
3515 	if (alloc_buflen > MLEN) {
3516 		if (type == MT_SONAME && alloc_buflen <= 112) {
3517 			alloc_buflen = MLEN;    /* unix domain compat. hack */
3518 		} else if (alloc_buflen > MCLBYTES) {
3519 			return EINVAL;
3520 		}
3521 	}
3522 	m = m_get(M_WAIT, type);
3523 	if (m == NULL) {
3524 		return ENOBUFS;
3525 	}
3526 	if (alloc_buflen > MLEN) {
3527 		MCLGET(m, M_WAIT);
3528 		if ((m->m_flags & M_EXT) == 0) {
3529 			m_free(m);
3530 			return ENOBUFS;
3531 		}
3532 	}
3533 	/*
3534 	 * K64: We still copyin the original buflen because it gets expanded
3535 	 * later and we lie about the size of the mbuf because it only affects
3536 	 * unp_* functions
3537 	 */
3538 	m->m_len = buflen;
3539 	error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
3540 	if (error) {
3541 		(void) m_free(m);
3542 	} else {
3543 		*mp = m;
3544 		if (type == MT_SONAME) {
3545 			VERIFY(buflen <= SOCK_MAXADDRLEN);
3546 			sa = mtod(m, sockaddr_ref_t);
3547 			sa->sa_len = (__uint8_t)buflen;
3548 		}
3549 	}
3550 	return error;
3551 }
3552 
3553 /*
3554  * Given a user_addr_t of length len, allocate and fill out a *sa.
3555  *
3556  * Returns:	0			Success
3557  *		ENAMETOOLONG		Filename too long
3558  *		EINVAL			Invalid argument
3559  *		ENOMEM			Not enough space
3560  *		copyin:EFAULT		Bad address
3561  */
3562 static int
getsockaddr(struct socket * so,sockaddr_ref_ref_t namp,user_addr_t uaddr,size_t len,boolean_t translate_unspec)3563 getsockaddr(struct socket *so, sockaddr_ref_ref_t namp, user_addr_t uaddr,
3564     size_t len, boolean_t translate_unspec)
3565 {
3566 	sockaddr_ref_t  sa;
3567 	int error;
3568 
3569 	if (len > SOCK_MAXADDRLEN) {
3570 		return ENAMETOOLONG;
3571 	}
3572 
3573 	if (len < offsetof(struct sockaddr, sa_data[0])) {
3574 		return EINVAL;
3575 	}
3576 
3577 	sa = (sockaddr_ref_t)alloc_sockaddr(len, Z_WAITOK | Z_NOFAIL);
3578 
3579 	error = copyin(uaddr, (caddr_t)sa, len);
3580 	if (error) {
3581 		free_sockaddr(sa);
3582 	} else {
3583 		/*
3584 		 * Force sa_family to AF_INET on AF_INET sockets to handle
3585 		 * legacy applications that use AF_UNSPEC (0).  On all other
3586 		 * sockets we leave it unchanged and let the lower layer
3587 		 * handle it.
3588 		 */
3589 		if (translate_unspec && sa->sa_family == AF_UNSPEC &&
3590 		    SOCK_CHECK_DOM(so, PF_INET) &&
3591 		    len == sizeof(struct sockaddr_in)) {
3592 			sa->sa_family = AF_INET;
3593 		}
3594 		VERIFY(len <= SOCK_MAXADDRLEN);
3595 		sa = *&sa;
3596 		sa->sa_len = (__uint8_t)len;
3597 		*namp = sa;
3598 	}
3599 	return error;
3600 }
3601 
3602 static int
getsockaddr_s(struct socket * so,sockaddr_storage_ref_t ss,user_addr_t uaddr,size_t len,boolean_t translate_unspec)3603 getsockaddr_s(struct socket *so, sockaddr_storage_ref_t ss,
3604     user_addr_t uaddr, size_t len, boolean_t translate_unspec)
3605 {
3606 	int error;
3607 
3608 	if (ss == NULL || uaddr == USER_ADDR_NULL ||
3609 	    len < offsetof(struct sockaddr, sa_data[0])) {
3610 		return EINVAL;
3611 	}
3612 
3613 	/*
3614 	 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
3615 	 * so the check here is inclusive.
3616 	 */
3617 	if (len > sizeof(*ss)) {
3618 		return ENAMETOOLONG;
3619 	}
3620 
3621 	bzero(ss, sizeof(*ss));
3622 	error = copyin(uaddr, (caddr_t)ss, len);
3623 	if (error == 0) {
3624 		/*
3625 		 * Force sa_family to AF_INET on AF_INET sockets to handle
3626 		 * legacy applications that use AF_UNSPEC (0).  On all other
3627 		 * sockets we leave it unchanged and let the lower layer
3628 		 * handle it.
3629 		 */
3630 		if (translate_unspec && ss->ss_family == AF_UNSPEC &&
3631 		    SOCK_CHECK_DOM(so, PF_INET) &&
3632 		    len == sizeof(struct sockaddr_in)) {
3633 			ss->ss_family = AF_INET;
3634 		}
3635 
3636 		ss->ss_len = (__uint8_t)len;
3637 	}
3638 	return error;
3639 }
3640 
3641 #if DEBUG || DEVELOPMENT
3642 int
internalize_user_msghdr_array(const void_ptr_t src,int spacetype,int direction,u_int count,user_msghdr_x_ptr_t dst,uio_ref_ptr_t uiop)3643 internalize_user_msghdr_array(const void_ptr_t src, int spacetype, int direction,
3644     u_int count, user_msghdr_x_ptr_t dst, uio_ref_ptr_t uiop)
3645 {
3646 	int error = 0;
3647 	u_int i;
3648 	u_int namecnt = 0;
3649 	u_int ctlcnt = 0;
3650 
3651 	for (i = 0; i < count; i++) {
3652 		uio_t auio;
3653 		struct user_iovec *iovp;
3654 		struct user_msghdr_x *user_msg = dst + i;
3655 
3656 		if (spacetype == UIO_USERSPACE64) {
3657 			const struct user64_msghdr_x *msghdr64;
3658 
3659 			msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3660 
3661 			user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
3662 			user_msg->msg_namelen = msghdr64->msg_namelen;
3663 			user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
3664 			user_msg->msg_iovlen = msghdr64->msg_iovlen;
3665 			user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
3666 			user_msg->msg_controllen = msghdr64->msg_controllen;
3667 			user_msg->msg_flags = msghdr64->msg_flags;
3668 			user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
3669 		} else {
3670 			const struct user32_msghdr_x *msghdr32;
3671 
3672 			msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3673 
3674 			user_msg->msg_name = msghdr32->msg_name;
3675 			user_msg->msg_namelen = msghdr32->msg_namelen;
3676 			user_msg->msg_iov = msghdr32->msg_iov;
3677 			user_msg->msg_iovlen = msghdr32->msg_iovlen;
3678 			user_msg->msg_control = msghdr32->msg_control;
3679 			user_msg->msg_controllen = msghdr32->msg_controllen;
3680 			user_msg->msg_flags = msghdr32->msg_flags;
3681 			user_msg->msg_datalen = msghdr32->msg_datalen;
3682 		}
3683 
3684 		if (user_msg->msg_iovlen <= 0 ||
3685 		    user_msg->msg_iovlen > UIO_MAXIOV) {
3686 			error = EMSGSIZE;
3687 			goto done;
3688 		}
3689 		auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
3690 		    direction);
3691 		if (auio == NULL) {
3692 			error = ENOMEM;
3693 			goto done;
3694 		}
3695 		uiop[i] = auio;
3696 
3697 		iovp = uio_iovsaddr(auio);
3698 		if (iovp == NULL) {
3699 			error = ENOMEM;
3700 			goto done;
3701 		}
3702 		error = copyin_user_iovec_array(user_msg->msg_iov,
3703 		    spacetype, user_msg->msg_iovlen, iovp);
3704 		if (error) {
3705 			goto done;
3706 		}
3707 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3708 
3709 		error = uio_calculateresid(auio);
3710 		if (error) {
3711 			goto done;
3712 		}
3713 		user_msg->msg_datalen = uio_resid(auio);
3714 
3715 		if (user_msg->msg_name && user_msg->msg_namelen) {
3716 			namecnt++;
3717 		}
3718 		if (user_msg->msg_control && user_msg->msg_controllen) {
3719 			ctlcnt++;
3720 		}
3721 	}
3722 done:
3723 
3724 	return error;
3725 }
3726 #endif /* DEBUG || DEVELOPMENT */
3727 
3728 int
internalize_recv_msghdr_array(const void_ptr_t src,int spacetype,int direction,u_int count,user_msghdr_x_ptr_t dst,recv_msg_elem_ptr_t recv_msg_array)3729 internalize_recv_msghdr_array(const void_ptr_t src, int spacetype, int direction,
3730     u_int count, user_msghdr_x_ptr_t dst,
3731     recv_msg_elem_ptr_t recv_msg_array)
3732 {
3733 	int error = 0;
3734 	u_int i;
3735 
3736 	for (i = 0; i < count; i++) {
3737 		struct user_iovec *iovp;
3738 		struct user_msghdr_x *user_msg = dst + i;
3739 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3740 
3741 		if (spacetype == UIO_USERSPACE64) {
3742 			const struct user64_msghdr_x *msghdr64;
3743 
3744 			msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3745 
3746 			user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
3747 			user_msg->msg_namelen = msghdr64->msg_namelen;
3748 			user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
3749 			user_msg->msg_iovlen = msghdr64->msg_iovlen;
3750 			user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
3751 			user_msg->msg_controllen = msghdr64->msg_controllen;
3752 			user_msg->msg_flags = msghdr64->msg_flags;
3753 			user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
3754 		} else {
3755 			const struct user32_msghdr_x *msghdr32;
3756 
3757 			msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3758 
3759 			user_msg->msg_name = msghdr32->msg_name;
3760 			user_msg->msg_namelen = msghdr32->msg_namelen;
3761 			user_msg->msg_iov = msghdr32->msg_iov;
3762 			user_msg->msg_iovlen = msghdr32->msg_iovlen;
3763 			user_msg->msg_control = msghdr32->msg_control;
3764 			user_msg->msg_controllen = msghdr32->msg_controllen;
3765 			user_msg->msg_flags = msghdr32->msg_flags;
3766 			user_msg->msg_datalen = msghdr32->msg_datalen;
3767 		}
3768 
3769 		if (user_msg->msg_iovlen <= 0 ||
3770 		    user_msg->msg_iovlen > UIO_MAXIOV) {
3771 			error = EMSGSIZE;
3772 			goto done;
3773 		}
3774 		recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3775 		    spacetype, direction);
3776 		if (recv_msg_elem->uio == NULL) {
3777 			error = ENOMEM;
3778 			goto done;
3779 		}
3780 
3781 		iovp = uio_iovsaddr(recv_msg_elem->uio);
3782 		if (iovp == NULL) {
3783 			error = ENOMEM;
3784 			goto done;
3785 		}
3786 		error = copyin_user_iovec_array(user_msg->msg_iov,
3787 		    spacetype, user_msg->msg_iovlen, iovp);
3788 		if (error) {
3789 			goto done;
3790 		}
3791 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3792 
3793 		error = uio_calculateresid(recv_msg_elem->uio);
3794 		if (error) {
3795 			goto done;
3796 		}
3797 		user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3798 
3799 		if (user_msg->msg_name && user_msg->msg_namelen) {
3800 			recv_msg_elem->which |= SOCK_MSG_SA;
3801 		}
3802 		if (user_msg->msg_control && user_msg->msg_controllen) {
3803 			recv_msg_elem->which |= SOCK_MSG_CONTROL;
3804 		}
3805 	}
3806 done:
3807 
3808 	return error;
3809 }
3810 
3811 #if DEBUG || DEVELOPMENT
3812 void
externalize_user_msghdr_array(void_ptr_t dst,int spacetype,int direction,u_int count,const user_msghdr_x_ptr_t src,uio_ref_ptr_t uiop)3813 externalize_user_msghdr_array(void_ptr_t dst, int spacetype, int direction,
3814     u_int count, const user_msghdr_x_ptr_t src, uio_ref_ptr_t uiop)
3815 {
3816 #pragma unused(direction)
3817 	u_int i;
3818 
3819 	for (i = 0; i < count; i++) {
3820 		const struct user_msghdr_x *user_msg = src + i;
3821 		uio_t auio = uiop[i];
3822 		user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3823 
3824 		if (spacetype == UIO_USERSPACE64) {
3825 			struct user64_msghdr_x *msghdr64;
3826 
3827 			msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3828 
3829 			msghdr64->msg_flags = user_msg->msg_flags;
3830 			msghdr64->msg_datalen = len;
3831 		} else {
3832 			struct user32_msghdr_x *msghdr32;
3833 
3834 			msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3835 
3836 			msghdr32->msg_flags = user_msg->msg_flags;
3837 			msghdr32->msg_datalen = (user32_size_t)len;
3838 		}
3839 	}
3840 }
3841 #endif /* DEBUG || DEVELOPMENT */
3842 
3843 u_int
externalize_recv_msghdr_array(proc_ref_t p,socket_ref_t so,void_ptr_t dst,u_int count,user_msghdr_x_ptr_t src,recv_msg_elem_ptr_t recv_msg_array,int_ref_t ret_error)3844 externalize_recv_msghdr_array(proc_ref_t p, socket_ref_t so, void_ptr_t dst,
3845     u_int count, user_msghdr_x_ptr_t src,
3846     recv_msg_elem_ptr_t recv_msg_array, int_ref_t ret_error)
3847 {
3848 	u_int i;
3849 	u_int retcnt = 0;
3850 	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3851 
3852 	*ret_error = 0;
3853 
3854 	for (i = 0; i < count; i++) {
3855 		struct user_msghdr_x *user_msg = src + i;
3856 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3857 		user_ssize_t len = 0;
3858 		int error;
3859 
3860 		len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3861 
3862 		if ((recv_msg_elem->which & SOCK_MSG_DATA)) {
3863 			retcnt++;
3864 
3865 			if (recv_msg_elem->which & SOCK_MSG_SA) {
3866 				error = copyout_sa(recv_msg_elem->psa, user_msg->msg_name,
3867 				    &user_msg->msg_namelen);
3868 				if (error != 0) {
3869 					*ret_error = error;
3870 					return 0;
3871 				}
3872 			}
3873 			if (recv_msg_elem->which & SOCK_MSG_CONTROL) {
3874 				error = copyout_control(p, recv_msg_elem->controlp,
3875 				    user_msg->msg_control, &user_msg->msg_controllen,
3876 				    &recv_msg_elem->flags, so);
3877 				if (error != 0) {
3878 					*ret_error = error;
3879 					return 0;
3880 				}
3881 			}
3882 		}
3883 
3884 		if (spacetype == UIO_USERSPACE64) {
3885 			struct user64_msghdr_x *msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3886 
3887 			msghdr64->msg_namelen = user_msg->msg_namelen;
3888 			msghdr64->msg_controllen = user_msg->msg_controllen;
3889 			msghdr64->msg_flags = recv_msg_elem->flags;
3890 			msghdr64->msg_datalen = len;
3891 		} else {
3892 			struct user32_msghdr_x *msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3893 
3894 			msghdr32->msg_namelen = user_msg->msg_namelen;
3895 			msghdr32->msg_controllen = user_msg->msg_controllen;
3896 			msghdr32->msg_flags = recv_msg_elem->flags;
3897 			msghdr32->msg_datalen = (user32_size_t)len;
3898 		}
3899 	}
3900 	return retcnt;
3901 }
3902 
3903 #if DEBUG || DEVELOPMENT
3904 void
free_uio_array(uio_ref_ptr_t uiop,u_int count)3905 free_uio_array(uio_ref_ptr_t uiop, u_int count)
3906 {
3907 	u_int i;
3908 
3909 	for (i = 0; i < count; i++) {
3910 		if (uiop[i] != NULL) {
3911 			uio_free(uiop[i]);
3912 		}
3913 	}
3914 }
3915 #endif /* DEBUG || DEVELOPMENT */
3916 
3917 /* Extern linkage requires using __counted_by instead of bptr */
3918 __private_extern__ user_ssize_t
uio_array_resid(uio_ref_t * __counted_by (count)uiop,u_int count)3919 uio_array_resid(uio_ref_t * __counted_by(count)uiop, u_int count)
3920 {
3921 	user_ssize_t len = 0;
3922 	u_int i;
3923 
3924 	for (i = 0; i < count; i++) {
3925 		struct uio *auio = uiop[i];
3926 
3927 		if (auio != NULL) {
3928 			len += uio_resid(auio);
3929 		}
3930 	}
3931 	return len;
3932 }
3933 
3934 #if DEBUG || DEVELOPMENT
3935 static boolean_t
uio_array_is_valid(uio_ref_ptr_t uiop,u_int count)3936 uio_array_is_valid(uio_ref_ptr_t uiop, u_int count)
3937 {
3938 	user_ssize_t len = 0;
3939 	u_int i;
3940 
3941 	for (i = 0; i < count; i++) {
3942 		struct uio *auio = uiop[i];
3943 
3944 		if (auio != NULL) {
3945 			user_ssize_t resid = uio_resid(auio);
3946 
3947 			/*
3948 			 * Sanity check on the validity of the iovec:
3949 			 * no point of going over sb_max
3950 			 */
3951 			if (resid < 0 || resid > (user_ssize_t)sb_max) {
3952 				return false;
3953 			}
3954 
3955 			len += resid;
3956 			if (len < 0 || len > (user_ssize_t)sb_max) {
3957 				return false;
3958 			}
3959 		}
3960 	}
3961 	return true;
3962 }
3963 #endif /* DEBUG || DEVELOPMENT */
3964 
3965 recv_msg_elem_ptr_t
alloc_recv_msg_array(u_int count)3966 alloc_recv_msg_array(u_int count)
3967 {
3968 	return kalloc_type(struct recv_msg_elem, count, Z_WAITOK | Z_ZERO);
3969 }
3970 
3971 void
free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array,u_int count)3972 free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3973 {
3974 	if (recv_msg_array == NULL) {
3975 		return;
3976 	}
3977 	for (uint32_t i = 0; i < count; i++) {
3978 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3979 
3980 		if (recv_msg_elem->uio != NULL) {
3981 			uio_free(recv_msg_elem->uio);
3982 		}
3983 		free_sockaddr(recv_msg_elem->psa);
3984 		if (recv_msg_elem->controlp != NULL) {
3985 			m_freem(recv_msg_elem->controlp);
3986 		}
3987 	}
3988 	kfree_type(struct recv_msg_elem, count, recv_msg_array);
3989 }
3990 
3991 
3992 /* Extern linkage requires using __counted_by instead of bptr */
3993 __private_extern__ user_ssize_t
recv_msg_array_resid(struct recv_msg_elem * __counted_by (count)recv_msg_array,u_int count)3994 recv_msg_array_resid(struct recv_msg_elem * __counted_by(count)recv_msg_array, u_int count)
3995 {
3996 	user_ssize_t len = 0;
3997 	u_int i;
3998 
3999 	for (i = 0; i < count; i++) {
4000 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
4001 
4002 		if (recv_msg_elem->uio != NULL) {
4003 			len += uio_resid(recv_msg_elem->uio);
4004 		}
4005 	}
4006 	return len;
4007 }
4008 
4009 int
recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array,u_int count)4010 recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array, u_int count)
4011 {
4012 	user_ssize_t len = 0;
4013 	u_int i;
4014 
4015 	for (i = 0; i < count; i++) {
4016 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
4017 
4018 		if (recv_msg_elem->uio != NULL) {
4019 			user_ssize_t resid = uio_resid(recv_msg_elem->uio);
4020 
4021 			/*
4022 			 * Sanity check on the validity of the iovec:
4023 			 * no point of going over sb_max
4024 			 */
4025 			if (resid < 0 || (u_int32_t)resid > sb_max) {
4026 				return 0;
4027 			}
4028 
4029 			len += resid;
4030 			if (len < 0 || (u_int32_t)len > sb_max) {
4031 				return 0;
4032 			}
4033 		}
4034 	}
4035 	return 1;
4036 }
4037 
4038 #if SENDFILE
4039 
4040 #define SFUIOBUFS 64
4041 
4042 /* Macros to compute the number of mbufs needed depending on cluster size */
4043 #define HOWMANY_16K(n)  ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
4044 #define HOWMANY_4K(n)   ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
4045 
4046 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
4047 #define SENDFILE_MAX_BYTES      (SFUIOBUFS << PGSHIFT)
4048 
4049 /* Upper send limit in the number of mbuf clusters */
4050 #define SENDFILE_MAX_16K        HOWMANY_16K(SENDFILE_MAX_BYTES)
4051 #define SENDFILE_MAX_4K         HOWMANY_4K(SENDFILE_MAX_BYTES)
4052 
4053 static void
alloc_sendpkt(int how,size_t pktlen,unsigned int * maxchunks,mbuf_ref_ref_t m,boolean_t jumbocl)4054 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
4055     mbuf_ref_ref_t m, boolean_t jumbocl)
4056 {
4057 	unsigned int needed;
4058 
4059 	if (pktlen == 0) {
4060 		panic("%s: pktlen (%ld) must be non-zero", __func__, pktlen);
4061 	}
4062 
4063 	/*
4064 	 * Try to allocate for the whole thing.  Since we want full control
4065 	 * over the buffer size and be able to accept partial result, we can't
4066 	 * use mbuf_allocpacket().  The logic below is similar to sosend().
4067 	 */
4068 	*m = NULL;
4069 	if (pktlen > MBIGCLBYTES && jumbocl) {
4070 		needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
4071 		*m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
4072 	}
4073 	if (*m == NULL) {
4074 		needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
4075 		*m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
4076 	}
4077 
4078 	/*
4079 	 * Our previous attempt(s) at allocation had failed; the system
4080 	 * may be short on mbufs, and we want to block until they are
4081 	 * available.  This time, ask just for 1 mbuf and don't return
4082 	 * until we get it.
4083 	 */
4084 	if (*m == NULL) {
4085 		needed = 1;
4086 		*m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
4087 	}
4088 	if (*m == NULL) {
4089 		panic("%s: blocking allocation returned NULL", __func__);
4090 	}
4091 
4092 	*maxchunks = needed;
4093 }
4094 
4095 /*
4096  * sendfile(2).
4097  * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
4098  *	 struct sf_hdtr *hdtr, int flags)
4099  *
4100  * Send a file specified by 'fd' and starting at 'offset' to a socket
4101  * specified by 's'. Send only '*nbytes' of the file or until EOF if
4102  * *nbytes == 0. Optionally add a header and/or trailer to the socket
4103  * output. If specified, write the total number of bytes sent into *nbytes.
4104  */
4105 int
sendfile(proc_ref_t p,struct sendfile_args * uap,__unused int * retval)4106 sendfile(proc_ref_t p, struct sendfile_args *uap, __unused int *retval)
4107 {
4108 	fileproc_ref_t  fp;
4109 	vnode_ref_t  vp;
4110 	socket_ref_t so;
4111 	struct writev_nocancel_args nuap;
4112 	user_ssize_t writev_retval;
4113 	struct user_sf_hdtr user_hdtr;
4114 	struct user32_sf_hdtr user32_hdtr;
4115 	struct user64_sf_hdtr user64_hdtr;
4116 	off_t off, xfsize;
4117 	off_t nbytes = 0, sbytes = 0;
4118 	int error = 0;
4119 	size_t sizeof_hdtr;
4120 	off_t file_size;
4121 	struct vfs_context context = *vfs_context_current();
4122 
4123 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
4124 
4125 	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
4126 	    0, 0, 0, 0);
4127 
4128 	AUDIT_ARG(fd, uap->fd);
4129 	AUDIT_ARG(value32, uap->s);
4130 
4131 	/*
4132 	 * Do argument checking. Must be a regular file in, stream
4133 	 * type and connected socket out, positive offset.
4134 	 */
4135 	if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
4136 		goto done;
4137 	}
4138 	if ((fp->f_flag & FREAD) == 0) {
4139 		error = EBADF;
4140 		goto done1;
4141 	}
4142 	if (vnode_isreg(vp) == 0) {
4143 		error = ENOTSUP;
4144 		goto done1;
4145 	}
4146 	error = file_socket(uap->s, &so);
4147 	if (error) {
4148 		goto done1;
4149 	}
4150 	if (so == NULL) {
4151 		error = EBADF;
4152 		goto done2;
4153 	}
4154 	if (so->so_type != SOCK_STREAM) {
4155 		error = EINVAL;
4156 		goto done2;
4157 	}
4158 	if ((so->so_state & SS_ISCONNECTED) == 0) {
4159 		error = ENOTCONN;
4160 		goto done2;
4161 	}
4162 	if (uap->offset < 0) {
4163 		error = EINVAL;
4164 		goto done2;
4165 	}
4166 	if (uap->nbytes == USER_ADDR_NULL) {
4167 		error = EINVAL;
4168 		goto done2;
4169 	}
4170 	if (uap->flags != 0) {
4171 		error = EINVAL;
4172 		goto done2;
4173 	}
4174 
4175 	context.vc_ucred = fp->fp_glob->fg_cred;
4176 
4177 #if CONFIG_MACF_SOCKET_SUBSET
4178 	/* JMM - fetch connected sockaddr? */
4179 	error = mac_socket_check_send(context.vc_ucred, so, NULL);
4180 	if (error) {
4181 		goto done2;
4182 	}
4183 #endif
4184 
4185 	/*
4186 	 * Get number of bytes to send
4187 	 * Should it applies to size of header and trailer?
4188 	 */
4189 	error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
4190 	if (error) {
4191 		goto done2;
4192 	}
4193 
4194 	/*
4195 	 * If specified, get the pointer to the sf_hdtr struct for
4196 	 * any headers/trailers.
4197 	 */
4198 	if (uap->hdtr != USER_ADDR_NULL) {
4199 		caddr_t hdtrp;
4200 
4201 		bzero(&user_hdtr, sizeof(user_hdtr));
4202 		if (is_p_64bit_process) {
4203 			hdtrp = (caddr_t)&user64_hdtr;
4204 			sizeof_hdtr = sizeof(user64_hdtr);
4205 		} else {
4206 			hdtrp = (caddr_t)&user32_hdtr;
4207 			sizeof_hdtr = sizeof(user32_hdtr);
4208 		}
4209 		error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
4210 		if (error) {
4211 			goto done2;
4212 		}
4213 		if (is_p_64bit_process) {
4214 			user_hdtr.headers = user64_hdtr.headers;
4215 			user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
4216 			user_hdtr.trailers = user64_hdtr.trailers;
4217 			user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
4218 		} else {
4219 			user_hdtr.headers = user32_hdtr.headers;
4220 			user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
4221 			user_hdtr.trailers = user32_hdtr.trailers;
4222 			user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
4223 		}
4224 
4225 		/*
4226 		 * Send any headers. Wimp out and use writev(2).
4227 		 */
4228 		if (user_hdtr.headers != USER_ADDR_NULL) {
4229 			bzero(&nuap, sizeof(struct writev_args));
4230 			nuap.fd = uap->s;
4231 			nuap.iovp = user_hdtr.headers;
4232 			nuap.iovcnt = user_hdtr.hdr_cnt;
4233 			error = writev_nocancel(p, &nuap, &writev_retval);
4234 			if (error) {
4235 				goto done2;
4236 			}
4237 			sbytes += writev_retval;
4238 		}
4239 	}
4240 
4241 	/*
4242 	 * Get the file size for 2 reasons:
4243 	 *  1. We don't want to allocate more mbufs than necessary
4244 	 *  2. We don't want to read past the end of file
4245 	 */
4246 	if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
4247 		goto done2;
4248 	}
4249 
4250 	/*
4251 	 * Simply read file data into a chain of mbufs that used with scatter
4252 	 * gather reads. We're not (yet?) setup to use zero copy external
4253 	 * mbufs that point to the file pages.
4254 	 */
4255 	socket_lock(so, 1);
4256 	error = sblock(&so->so_snd, SBL_WAIT);
4257 	if (error) {
4258 		socket_unlock(so, 1);
4259 		goto done2;
4260 	}
4261 	for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
4262 		mbuf_ref_t m0 = NULL;
4263 		mbuf_t  m;
4264 		unsigned int    nbufs = SFUIOBUFS, i;
4265 		uio_t   auio;
4266 		UIO_STACKBUF(uio_buf, SFUIOBUFS);               /* 1KB !!! */
4267 		size_t  uiolen;
4268 		user_ssize_t    rlen;
4269 		off_t   pgoff;
4270 		size_t  pktlen;
4271 		boolean_t jumbocl;
4272 
4273 		/*
4274 		 * Calculate the amount to transfer.
4275 		 * Align to round number of pages.
4276 		 * Not to exceed send socket buffer,
4277 		 * the EOF, or the passed in nbytes.
4278 		 */
4279 		xfsize = sbspace(&so->so_snd);
4280 
4281 		if (xfsize <= 0) {
4282 			if (so->so_state & SS_CANTSENDMORE) {
4283 				error = EPIPE;
4284 				goto done3;
4285 			} else if ((so->so_state & SS_NBIO)) {
4286 				error = EAGAIN;
4287 				goto done3;
4288 			} else {
4289 				xfsize = PAGE_SIZE;
4290 			}
4291 		}
4292 
4293 		if (xfsize > SENDFILE_MAX_BYTES) {
4294 			xfsize = SENDFILE_MAX_BYTES;
4295 		} else if (xfsize > PAGE_SIZE) {
4296 			xfsize = trunc_page(xfsize);
4297 		}
4298 		pgoff = off & PAGE_MASK_64;
4299 		if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
4300 			xfsize = PAGE_SIZE_64 - pgoff;
4301 		}
4302 		if (nbytes && xfsize > (nbytes - sbytes)) {
4303 			xfsize = nbytes - sbytes;
4304 		}
4305 		if (xfsize <= 0) {
4306 			break;
4307 		}
4308 		if (off + xfsize > file_size) {
4309 			xfsize = file_size - off;
4310 		}
4311 		if (xfsize <= 0) {
4312 			break;
4313 		}
4314 
4315 		/*
4316 		 * Attempt to use larger than system page-size clusters for
4317 		 * large writes only if there is a jumbo cluster pool and
4318 		 * if the socket is marked accordingly.
4319 		 */
4320 		jumbocl = sosendjcl && njcl > 0 &&
4321 		    ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
4322 
4323 		socket_unlock(so, 0);
4324 		alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
4325 		pktlen = mbuf_pkthdr_maxlen(m0);
4326 		if (pktlen < (size_t)xfsize) {
4327 			xfsize = pktlen;
4328 		}
4329 
4330 		auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
4331 		    UIO_READ, &uio_buf[0], sizeof(uio_buf));
4332 		if (auio == NULL) {
4333 			DBG_PRINTF("sendfile failed. nbufs = %d. %s", nbufs,
4334 			    "File a radar related to rdar://10146739.\n");
4335 			mbuf_freem(m0);
4336 			error = ENXIO;
4337 			socket_lock(so, 0);
4338 			goto done3;
4339 		}
4340 
4341 		for (i = 0, m = m0, uiolen = 0;
4342 		    i < nbufs && m != NULL && uiolen < (size_t)xfsize;
4343 		    i++, m = mbuf_next(m)) {
4344 			size_t mlen = mbuf_maxlen(m);
4345 
4346 			if (mlen + uiolen > (size_t)xfsize) {
4347 				mlen = xfsize - uiolen;
4348 			}
4349 			mbuf_setlen(m, mlen);
4350 			uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
4351 			    mlen);
4352 			uiolen += mlen;
4353 		}
4354 
4355 		if (xfsize != uio_resid(auio)) {
4356 			DBG_PRINTF("sendfile: xfsize: %lld != uio_resid(auio): "
4357 			    "%lld\n", xfsize, (long long)uio_resid(auio));
4358 		}
4359 
4360 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
4361 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
4362 		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
4363 		error = fo_read(fp, auio, FOF_OFFSET, &context);
4364 		socket_lock(so, 0);
4365 		if (error != 0) {
4366 			if (uio_resid(auio) != xfsize && (error == ERESTART ||
4367 			    error == EINTR || error == EWOULDBLOCK)) {
4368 				error = 0;
4369 			} else {
4370 				mbuf_freem(m0);
4371 				goto done3;
4372 			}
4373 		}
4374 		xfsize -= uio_resid(auio);
4375 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
4376 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
4377 		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
4378 
4379 		if (xfsize == 0) {
4380 			break;
4381 		}
4382 		if (xfsize + off > file_size) {
4383 			DBG_PRINTF("sendfile: xfsize: %lld + off: %lld > file_size:"
4384 			    "%lld\n", xfsize, off, file_size);
4385 		}
4386 		for (i = 0, m = m0, rlen = 0;
4387 		    i < nbufs && m != NULL && rlen < xfsize;
4388 		    i++, m = mbuf_next(m)) {
4389 			size_t mlen = mbuf_maxlen(m);
4390 
4391 			if (rlen + mlen > (size_t)xfsize) {
4392 				mlen = xfsize - rlen;
4393 			}
4394 			mbuf_setlen(m, mlen);
4395 
4396 			rlen += mlen;
4397 		}
4398 		mbuf_pkthdr_setlen(m0, xfsize);
4399 
4400 retry_space:
4401 		/*
4402 		 * Make sure that the socket is still able to take more data.
4403 		 * CANTSENDMORE being true usually means that the connection
4404 		 * was closed. so_error is true when an error was sensed after
4405 		 * a previous send.
4406 		 * The state is checked after the page mapping and buffer
4407 		 * allocation above since those operations may block and make
4408 		 * any socket checks stale. From this point forward, nothing
4409 		 * blocks before the pru_send (or more accurately, any blocking
4410 		 * results in a loop back to here to re-check).
4411 		 */
4412 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
4413 			if (so->so_state & SS_CANTSENDMORE) {
4414 				error = EPIPE;
4415 			} else {
4416 				error = so->so_error;
4417 				so->so_error = 0;
4418 			}
4419 			m_freem(m0);
4420 			goto done3;
4421 		}
4422 		/*
4423 		 * Wait for socket space to become available. We do this just
4424 		 * after checking the connection state above in order to avoid
4425 		 * a race condition with sbwait().
4426 		 */
4427 		if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
4428 			if (so->so_state & SS_NBIO) {
4429 				m_freem(m0);
4430 				error = EAGAIN;
4431 				goto done3;
4432 			}
4433 			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
4434 			    DBG_FUNC_START), uap->s, 0, 0, 0, 0);
4435 			error = sbwait(&so->so_snd);
4436 			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
4437 			    DBG_FUNC_END), uap->s, 0, 0, 0, 0);
4438 			/*
4439 			 * An error from sbwait usually indicates that we've
4440 			 * been interrupted by a signal. If we've sent anything
4441 			 * then return bytes sent, otherwise return the error.
4442 			 */
4443 			if (error) {
4444 				m_freem(m0);
4445 				goto done3;
4446 			}
4447 			goto retry_space;
4448 		}
4449 
4450 		mbuf_ref_t  control = NULL;
4451 		{
4452 			/*
4453 			 * Socket filter processing
4454 			 */
4455 
4456 			error = sflt_data_out(so, NULL, &m0, &control, 0);
4457 			if (error) {
4458 				if (error == EJUSTRETURN) {
4459 					error = 0;
4460 					continue;
4461 				}
4462 				goto done3;
4463 			}
4464 			/*
4465 			 * End Socket filter processing
4466 			 */
4467 		}
4468 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
4469 		    uap->s, 0, 0, 0, 0);
4470 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
4471 		    NULL, control, p);
4472 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
4473 		    uap->s, 0, 0, 0, 0);
4474 		if (error) {
4475 			goto done3;
4476 		}
4477 	}
4478 	sbunlock(&so->so_snd, FALSE);   /* will unlock socket */
4479 	/*
4480 	 * Send trailers. Wimp out and use writev(2).
4481 	 */
4482 	if (uap->hdtr != USER_ADDR_NULL &&
4483 	    user_hdtr.trailers != USER_ADDR_NULL) {
4484 		bzero(&nuap, sizeof(struct writev_args));
4485 		nuap.fd = uap->s;
4486 		nuap.iovp = user_hdtr.trailers;
4487 		nuap.iovcnt = user_hdtr.trl_cnt;
4488 		error = writev_nocancel(p, &nuap, &writev_retval);
4489 		if (error) {
4490 			goto done2;
4491 		}
4492 		sbytes += writev_retval;
4493 	}
4494 done2:
4495 	file_drop(uap->s);
4496 done1:
4497 	file_drop(uap->fd);
4498 done:
4499 	if (uap->nbytes != USER_ADDR_NULL) {
4500 		/* XXX this appears bogus for some early failure conditions */
4501 		copyout(&sbytes, uap->nbytes, sizeof(off_t));
4502 	}
4503 	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
4504 	    (unsigned int)((sbytes >> 32) & 0x0ffffffff),
4505 	    (unsigned int)(sbytes & 0x0ffffffff), error, 0);
4506 	return error;
4507 done3:
4508 	sbunlock(&so->so_snd, FALSE);   /* will unlock socket */
4509 	goto done2;
4510 }
4511 
4512 
4513 #endif /* SENDFILE */
4514