xref: /xnu-8796.121.2/bsd/kern/uipc_syscalls.c (revision c54f35ca767986246321eb901baf8f5ff7923f6a)
1 /*
2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1989, 1990, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * sendfile(2) and related extensions:
33  * Copyright (c) 1998, David Greenman. All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgement:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  * 4. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  *
63  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
64  */
65 /*
66  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67  * support for mandatory and extensible security protections.  This notice
68  * is included in support of clause 2.2 (b) of the Apple Public License,
69  * Version 2.0.
70  */
71 
72 #include <sys/cdefs.h>
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/filedesc.h>
76 #include <sys/proc_internal.h>
77 #include <sys/file_internal.h>
78 #include <sys/vnode_internal.h>
79 #include <sys/malloc.h>
80 #include <sys/mcache.h>
81 #include <sys/mbuf.h>
82 #include <kern/locks.h>
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/signalvar.h>
86 #include <sys/socket.h>
87 #include <sys/socketvar.h>
88 #include <sys/kernel.h>
89 #include <sys/uio_internal.h>
90 #include <sys/kauth.h>
91 #include <kern/task.h>
92 #include <sys/priv.h>
93 #include <sys/sysctl.h>
94 #include <sys/sys_domain.h>
95 #include <sys/types.h>
96 
97 #include <security/audit/audit.h>
98 
99 #include <sys/kdebug.h>
100 #include <sys/sysproto.h>
101 #include <netinet/in.h>
102 #include <net/route.h>
103 #include <netinet/in_pcb.h>
104 
105 #include <os/log.h>
106 #include <os/ptrtools.h>
107 
108 #include <os/log.h>
109 
110 #if CONFIG_MACF_SOCKET_SUBSET
111 #include <security/mac_framework.h>
112 #endif /* MAC_SOCKET_SUBSET */
113 
114 #define f_flag fp_glob->fg_flag
115 #define f_ops fp_glob->fg_ops
116 
117 #define DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETSOCK, 0)
118 #define DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETSOCK, 2)
119 #define DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETSOCK, 1)
120 #define DBG_LAYER_OUT_END       NETDBG_CODE(DBG_NETSOCK, 3)
121 #define DBG_FNC_SENDMSG         NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
122 #define DBG_FNC_SENDTO          NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
123 #define DBG_FNC_SENDIT          NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
124 #define DBG_FNC_RECVFROM        NETDBG_CODE(DBG_NETSOCK, (5 << 8))
125 #define DBG_FNC_RECVMSG         NETDBG_CODE(DBG_NETSOCK, (6 << 8))
126 #define DBG_FNC_RECVIT          NETDBG_CODE(DBG_NETSOCK, (7 << 8))
127 #define DBG_FNC_SENDFILE        NETDBG_CODE(DBG_NETSOCK, (10 << 8))
128 #define DBG_FNC_SENDFILE_WAIT   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
129 #define DBG_FNC_SENDFILE_READ   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
130 #define DBG_FNC_SENDFILE_SEND   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
131 #define DBG_FNC_SENDMSG_X       NETDBG_CODE(DBG_NETSOCK, (11 << 8))
132 #define DBG_FNC_RECVMSG_X       NETDBG_CODE(DBG_NETSOCK, (12 << 8))
133 
134 #if DEBUG || DEVELOPMENT
135 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
136 #define DBG_PRINTF(...) printf(__VA_ARGS__)
137 #else
138 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
139 #define DBG_PRINTF(...) do { } while (0)
140 #endif
141 
142 /* Forward declarations for referenced types */
143 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(void, void, __CCT_PTR);
144 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(uint8_t, uint8_t, __CCT_PTR);
145 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int32_t, int32, __CCT_REF);
146 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int, int, __CCT_REF);
147 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(user_ssize_t, user_ssize, __CCT_REF);
148 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(unsigned int, uint, __CCT_REF);
149 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(sae_connid_t, sae_connid, __CCT_REF);
150 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(socklen_t, socklen, __CCT_REF);
151 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct setsockopt_args, setsockopt_args, __CCT_REF);
152 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct connectx_args, connectx_args, __CCT_REF);
153 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct disconnectx_args, disconnectx_args, __CCT_REF);
154 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct cmsghdr, cmsghdr, __CCT_REF);
155 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct timeval, timeval, __CCT_REF);
156 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user64_timeval, user64_timeval, __CCT_REF);
157 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user32_timeval, user32_timeval, __CCT_REF);
158 
159 static int sendit(proc_ref_t, socket_ref_t, user_msghdr_ref_t, uio_t,
160     int, int32_ref_t );
161 static int recvit(proc_ref_t, int, user_msghdr_ref_t, uio_t, user_addr_t,
162     int32_ref_t);
163 static int connectit(socket_ref_t, sockaddr_ref_t);
164 static int getsockaddr(socket_ref_t, sockaddr_ref_ref_t, user_addr_t,
165     size_t, boolean_t);
166 static int getsockaddr_s(socket_ref_t, sockaddr_storage_ref_t,
167     user_addr_t, size_t, boolean_t);
168 #if SENDFILE
169 static void alloc_sendpkt(int, size_t, uint_ref_t, mbuf_ref_ref_t,
170     boolean_t);
171 #endif /* SENDFILE */
172 static int connectx_nocancel(proc_ref_t, connectx_args_ref_t, int_ref_t);
173 static int connectitx(socket_ref_t, sockaddr_ref_t,
174     sockaddr_ref_t, proc_ref_t, uint32_t, sae_associd_t,
175     sae_connid_ref_t, uio_t, unsigned int, user_ssize_ref_t);
176 static int disconnectx_nocancel(proc_ref_t, disconnectx_args_ref_t,
177     int_ref_t);
178 static int socket_common(proc_ref_t, int, int, int, pid_t, int32_ref_t, int);
179 
180 static int internalize_user_msghdr_array(const void_ptr_t, int, int,
181     u_int count, user_msghdr_x_ptr_t, uio_ref_ptr_t);
182 
183 static void externalize_user_msghdr_array(void_ptr_t, int, int, u_int count,
184     const user_msghdr_x_ptr_t, uio_ref_ptr_t);
185 
186 static void free_uio_array(uio_ref_ptr_t, u_int count);
187 static boolean_t uio_array_is_valid(uio_ref_ptr_t, u_int count);
188 static int internalize_recv_msghdr_array(const void_ptr_t, int, int,
189     u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t);
190 static u_int externalize_recv_msghdr_array(proc_ref_t, socket_ref_t, void_ptr_t,
191     u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t, int_ref_t);
192 
193 static recv_msg_elem_ptr_t alloc_recv_msg_array(u_int count);
194 static int recv_msg_array_is_valid(recv_msg_elem_ptr_t, u_int count);
195 static void free_recv_msg_array(recv_msg_elem_ptr_t, u_int count);
196 static int copyout_control(proc_ref_t, mbuf_ref_t, user_addr_t control,
197     socklen_ref_t, int_ref_t, socket_ref_t);
198 
199 SYSCTL_DECL(_kern_ipc);
200 
201 static u_int somaxsendmsgx = 100;
202 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
203     CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
204 static u_int somaxrecvmsgx = 100;
205 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
206     CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
207 
208 static u_int missingpktinfo = 0;
209 SYSCTL_UINT(_kern_ipc, OID_AUTO, missingpktinfo,
210     CTLFLAG_RD | CTLFLAG_LOCKED, &missingpktinfo, 0, "");
211 
212 /*
213  * System call interface to the socket abstraction.
214  */
215 
216 extern const struct fileops socketops;
217 
218 /*
219  * Returns:	0			Success
220  *		EACCES			Mandatory Access Control failure
221  *	falloc:ENFILE
222  *	falloc:EMFILE
223  *	falloc:ENOMEM
224  *	socreate:EAFNOSUPPORT
225  *	socreate:EPROTOTYPE
226  *	socreate:EPROTONOSUPPORT
227  *	socreate:ENOBUFS
228  *	socreate:ENOMEM
229  *	socreate:???			[other protocol families, IPSEC]
230  */
231 int
socket(proc_ref_t p,struct socket_args * uap,int32_ref_t retval)232 socket(proc_ref_t p,
233     struct socket_args *uap,
234     int32_ref_t retval)
235 {
236 	return socket_common(p, uap->domain, uap->type, uap->protocol,
237 	           proc_selfpid(), retval, 0);
238 }
239 
240 int
socket_delegate(proc_ref_t p,struct socket_delegate_args * uap,int32_ref_t retval)241 socket_delegate(proc_ref_t p,
242     struct socket_delegate_args *uap,
243     int32_ref_t retval)
244 {
245 	return socket_common(p, uap->domain, uap->type, uap->protocol,
246 	           uap->epid, retval, 1);
247 }
248 
249 static int
socket_common(proc_ref_t p,int domain,int type,int protocol,pid_t epid,int32_ref_t retval,int delegate)250 socket_common(proc_ref_t p,
251     int domain,
252     int type,
253     int protocol,
254     pid_t epid,
255     int32_ref_t retval,
256     int delegate)
257 {
258 	socket_ref_t so;
259 	fileproc_ref_t  fp;
260 	int fd, error;
261 
262 	AUDIT_ARG(socket, domain, type, protocol);
263 #if CONFIG_MACF_SOCKET_SUBSET
264 	if ((error = mac_socket_check_create(kauth_cred_get(), domain,
265 	    type, protocol)) != 0) {
266 		return error;
267 	}
268 #endif /* MAC_SOCKET_SUBSET */
269 
270 	if (delegate) {
271 		error = priv_check_cred(kauth_cred_get(),
272 		    PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
273 		if (error) {
274 			return EACCES;
275 		}
276 	}
277 
278 	error = falloc(p, &fp, &fd, vfs_context_current());
279 	if (error) {
280 		return error;
281 	}
282 	fp->f_flag = FREAD | FWRITE;
283 	fp->f_ops = &socketops;
284 
285 	if (delegate) {
286 		error = socreate_delegate(domain, &so, type, protocol, epid);
287 	} else {
288 		error = socreate(domain, &so, type, protocol);
289 	}
290 
291 	if (error) {
292 		fp_free(p, fd, fp);
293 	} else {
294 		fp_set_data(fp, so);
295 
296 		proc_fdlock(p);
297 		procfdtbl_releasefd(p, fd, NULL);
298 
299 		if (ENTR_SHOULDTRACE) {
300 			KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
301 			    fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
302 		}
303 		fp_drop(p, fd, fp, 1);
304 		proc_fdunlock(p);
305 
306 		*retval = fd;
307 	}
308 	return error;
309 }
310 
311 /*
312  * Returns:	0			Success
313  *		EDESTADDRREQ		Destination address required
314  *		EBADF			Bad file descriptor
315  *		EACCES			Mandatory Access Control failure
316  *	file_socket:ENOTSOCK
317  *	file_socket:EBADF
318  *	getsockaddr:ENAMETOOLONG	Filename too long
319  *	getsockaddr:EINVAL		Invalid argument
320  *	getsockaddr:ENOMEM		Not enough space
321  *	getsockaddr:EFAULT		Bad address
322  *	sobindlock:???
323  */
324 /* ARGSUSED */
325 int
bind(__unused proc_t p,struct bind_args * uap,__unused int32_ref_t retval)326 bind(__unused proc_t p, struct bind_args *uap, __unused int32_ref_t retval)
327 {
328 	struct sockaddr_storage ss;
329 	sockaddr_ref_t  sa = NULL;
330 	socket_ref_t so;
331 	boolean_t want_free = TRUE;
332 	int error;
333 
334 	AUDIT_ARG(fd, uap->s);
335 	error = file_socket(uap->s, &so);
336 	if (error != 0) {
337 		return error;
338 	}
339 	if (so == NULL) {
340 		error = EBADF;
341 		goto out;
342 	}
343 	if (uap->name == USER_ADDR_NULL) {
344 		error = EDESTADDRREQ;
345 		goto out;
346 	}
347 	if (uap->namelen > sizeof(ss)) {
348 		error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
349 	} else {
350 		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
351 		if (error == 0) {
352 			sa = (sockaddr_ref_t)&ss;
353 			want_free = FALSE;
354 		}
355 	}
356 	if (error != 0) {
357 		goto out;
358 	}
359 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
360 #if CONFIG_MACF_SOCKET_SUBSET
361 	if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
362 	    (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
363 		error = sobindlock(so, sa, 1);  /* will lock socket */
364 	}
365 #else
366 	error = sobindlock(so, sa, 1);          /* will lock socket */
367 #endif /* MAC_SOCKET_SUBSET */
368 	if (want_free) {
369 		free_sockaddr(sa);
370 	}
371 out:
372 	file_drop(uap->s);
373 	return error;
374 }
375 
376 /*
377  * Returns:	0			Success
378  *		EBADF
379  *		EACCES			Mandatory Access Control failure
380  *	file_socket:ENOTSOCK
381  *	file_socket:EBADF
382  *	solisten:EINVAL
383  *	solisten:EOPNOTSUPP
384  *	solisten:???
385  */
386 int
listen(__unused proc_ref_t p,struct listen_args * uap,__unused int32_ref_t retval)387 listen(__unused proc_ref_t p, struct listen_args *uap,
388     __unused int32_ref_t retval)
389 {
390 	int error;
391 	socket_ref_t so;
392 
393 	AUDIT_ARG(fd, uap->s);
394 	error = file_socket(uap->s, &so);
395 	if (error) {
396 		return error;
397 	}
398 	if (so != NULL)
399 #if CONFIG_MACF_SOCKET_SUBSET
400 	{
401 		error = mac_socket_check_listen(kauth_cred_get(), so);
402 		if (error == 0) {
403 			error = solisten(so, uap->backlog);
404 		}
405 	}
406 #else
407 	{ error = solisten(so, uap->backlog);}
408 #endif /* MAC_SOCKET_SUBSET */
409 	else {
410 		error = EBADF;
411 	}
412 
413 	file_drop(uap->s);
414 	return error;
415 }
416 
417 /*
418  * Returns:	fp_get_ftype:EBADF	Bad file descriptor
419  *		fp_get_ftype:ENOTSOCK	Socket operation on non-socket
420  *		:EFAULT			Bad address on copyin/copyout
421  *		:EBADF			Bad file descriptor
422  *		:EOPNOTSUPP		Operation not supported on socket
423  *		:EINVAL			Invalid argument
424  *		:EWOULDBLOCK		Operation would block
425  *		:ECONNABORTED		Connection aborted
426  *		:EINTR			Interrupted function
427  *		:EACCES			Mandatory Access Control failure
428  *		falloc:ENFILE		Too many files open in system
429  *		falloc:EMFILE		Too many open files
430  *		falloc:ENOMEM		Not enough space
431  *		0			Success
432  */
433 int
accept_nocancel(proc_ref_t p,struct accept_nocancel_args * uap,int32_ref_t retval)434 accept_nocancel(proc_ref_t p, struct accept_nocancel_args *uap,
435     int32_ref_t retval)
436 {
437 	fileproc_ref_t  fp;
438 	sockaddr_ref_t  sa = NULL;
439 	socklen_t namelen;
440 	int error;
441 	socket_ref_t  head;
442 	socket_ref_t so = NULL;
443 	lck_mtx_t *mutex_held;
444 	int fd = uap->s;
445 	int newfd;
446 	unsigned int fflag;
447 	int dosocklock = 0;
448 
449 	*retval = -1;
450 
451 	AUDIT_ARG(fd, uap->s);
452 
453 	if (uap->name) {
454 		error = copyin(uap->anamelen, (caddr_t)&namelen,
455 		    sizeof(socklen_t));
456 		if (error) {
457 			return error;
458 		}
459 	}
460 	error = fp_get_ftype(p, fd, DTYPE_SOCKET, ENOTSOCK, &fp);
461 	if (error) {
462 		return error;
463 	}
464 	head = (struct socket *)fp_get_data(fp);
465 
466 #if CONFIG_MACF_SOCKET_SUBSET
467 	if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
468 		goto out;
469 	}
470 #endif /* MAC_SOCKET_SUBSET */
471 
472 	socket_lock(head, 1);
473 
474 	if (head->so_proto->pr_getlock != NULL) {
475 		mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
476 		dosocklock = 1;
477 	} else {
478 		mutex_held = head->so_proto->pr_domain->dom_mtx;
479 		dosocklock = 0;
480 	}
481 
482 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
483 		if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
484 			error = EOPNOTSUPP;
485 		} else {
486 			/* POSIX: The socket is not accepting connections */
487 			error = EINVAL;
488 		}
489 		socket_unlock(head, 1);
490 		goto out;
491 	}
492 check_again:
493 	if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
494 		socket_unlock(head, 1);
495 		error = EWOULDBLOCK;
496 		goto out;
497 	}
498 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
499 		if (head->so_state & SS_CANTRCVMORE) {
500 			head->so_error = ECONNABORTED;
501 			break;
502 		}
503 		if (head->so_usecount < 1) {
504 			panic("accept: head=%p refcount=%d", head,
505 			    head->so_usecount);
506 		}
507 		error = msleep((caddr_t)&head->so_timeo, mutex_held,
508 		    PSOCK | PCATCH, "accept", 0);
509 		if (head->so_usecount < 1) {
510 			panic("accept: 2 head=%p refcount=%d", head,
511 			    head->so_usecount);
512 		}
513 		if ((head->so_state & SS_DRAINING)) {
514 			error = ECONNABORTED;
515 		}
516 		if (error) {
517 			socket_unlock(head, 1);
518 			goto out;
519 		}
520 	}
521 	if (head->so_error) {
522 		error = head->so_error;
523 		head->so_error = 0;
524 		socket_unlock(head, 1);
525 		goto out;
526 	}
527 
528 	/*
529 	 * At this point we know that there is at least one connection
530 	 * ready to be accepted. Remove it from the queue prior to
531 	 * allocating the file descriptor for it since falloc() may
532 	 * block allowing another process to accept the connection
533 	 * instead.
534 	 */
535 	lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
536 
537 	so_acquire_accept_list(head, NULL);
538 	if (TAILQ_EMPTY(&head->so_comp)) {
539 		so_release_accept_list(head);
540 		goto check_again;
541 	}
542 
543 	so = TAILQ_FIRST(&head->so_comp);
544 	TAILQ_REMOVE(&head->so_comp, so, so_list);
545 	/*
546 	 * Acquire the lock of the new connection
547 	 * as we may be in the process of receiving
548 	 * a packet that may change its so_state
549 	 * (e.g.: a TCP FIN).
550 	 */
551 	if (dosocklock) {
552 		socket_lock(so, 0);
553 	}
554 	so->so_head = NULL;
555 	so->so_state &= ~SS_COMP;
556 	if (dosocklock) {
557 		socket_unlock(so, 0);
558 	}
559 	head->so_qlen--;
560 	so_release_accept_list(head);
561 
562 	/* unlock head to avoid deadlock with select, keep a ref on head */
563 	socket_unlock(head, 0);
564 
565 #if CONFIG_MACF_SOCKET_SUBSET
566 	/*
567 	 * Pass the pre-accepted socket to the MAC framework. This is
568 	 * cheaper than allocating a file descriptor for the socket,
569 	 * calling the protocol accept callback, and possibly freeing
570 	 * the file descriptor should the MAC check fails.
571 	 */
572 	if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
573 		socket_lock(so, 1);
574 		so->so_state &= ~SS_NOFDREF;
575 		socket_unlock(so, 1);
576 		soclose(so);
577 		/* Drop reference on listening socket */
578 		sodereference(head);
579 		goto out;
580 	}
581 #endif /* MAC_SOCKET_SUBSET */
582 
583 	/*
584 	 * Pass the pre-accepted socket to any interested socket filter(s).
585 	 * Upon failure, the socket would have been closed by the callee.
586 	 */
587 	if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
588 		/* Drop reference on listening socket */
589 		sodereference(head);
590 		/* Propagate socket filter's error code to the caller */
591 		goto out;
592 	}
593 
594 	fflag = fp->f_flag;
595 	error = falloc(p, &fp, &newfd, vfs_context_current());
596 	if (error) {
597 		/*
598 		 * Probably ran out of file descriptors.
599 		 *
600 		 * <rdar://problem/8554930>
601 		 * Don't put this back on the socket like we used to, that
602 		 * just causes the client to spin. Drop the socket.
603 		 */
604 		socket_lock(so, 1);
605 		so->so_state &= ~SS_NOFDREF;
606 		socket_unlock(so, 1);
607 		soclose(so);
608 		sodereference(head);
609 		goto out;
610 	}
611 	*retval = newfd;
612 	fp->f_flag = fflag;
613 	fp->f_ops = &socketops;
614 	fp_set_data(fp, so);
615 
616 	socket_lock(head, 0);
617 	if (dosocklock) {
618 		socket_lock(so, 1);
619 	}
620 
621 	/* Sync socket non-blocking/async state with file flags */
622 	if (fp->f_flag & FNONBLOCK) {
623 		so->so_state |= SS_NBIO;
624 	} else {
625 		so->so_state &= ~SS_NBIO;
626 	}
627 
628 	if (fp->f_flag & FASYNC) {
629 		so->so_state |= SS_ASYNC;
630 		so->so_rcv.sb_flags |= SB_ASYNC;
631 		so->so_snd.sb_flags |= SB_ASYNC;
632 	} else {
633 		so->so_state &= ~SS_ASYNC;
634 		so->so_rcv.sb_flags &= ~SB_ASYNC;
635 		so->so_snd.sb_flags &= ~SB_ASYNC;
636 	}
637 
638 	(void) soacceptlock(so, &sa, 0);
639 	socket_unlock(head, 1);
640 	if (sa == NULL) {
641 		namelen = 0;
642 		if (uap->name) {
643 			goto gotnoname;
644 		}
645 		error = 0;
646 		goto releasefd;
647 	}
648 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
649 
650 	if (uap->name) {
651 		socklen_t       sa_len;
652 
653 		/* save sa_len before it is destroyed */
654 		sa_len = sa->sa_len;
655 		namelen = MIN(namelen, sa_len);
656 		error = copyout(sa, uap->name, namelen);
657 		if (!error) {
658 			/* return the actual, untruncated address length */
659 			namelen = sa_len;
660 		}
661 gotnoname:
662 		error = copyout((caddr_t)&namelen, uap->anamelen,
663 		    sizeof(socklen_t));
664 	}
665 	free_sockaddr(sa);
666 
667 releasefd:
668 	/*
669 	 * If the socket has been marked as inactive by sosetdefunct(),
670 	 * disallow further operations on it.
671 	 */
672 	if (so->so_flags & SOF_DEFUNCT) {
673 		sodefunct(current_proc(), so,
674 		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
675 	}
676 
677 	if (dosocklock) {
678 		socket_unlock(so, 1);
679 	}
680 
681 	proc_fdlock(p);
682 	procfdtbl_releasefd(p, newfd, NULL);
683 	fp_drop(p, newfd, fp, 1);
684 	proc_fdunlock(p);
685 
686 out:
687 	if (error == 0 && ENTR_SHOULDTRACE) {
688 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
689 		    newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
690 	}
691 
692 	file_drop(fd);
693 	return error;
694 }
695 
696 int
accept(proc_ref_t p,struct accept_args * uap,int32_ref_t retval)697 accept(proc_ref_t p, struct accept_args *uap, int32_ref_t retval)
698 {
699 	__pthread_testcancel(1);
700 	return accept_nocancel(p, (struct accept_nocancel_args *)uap,
701 	           retval);
702 }
703 
704 /*
705  * Returns:	0			Success
706  *		EBADF			Bad file descriptor
707  *		EALREADY		Connection already in progress
708  *		EINPROGRESS		Operation in progress
709  *		ECONNABORTED		Connection aborted
710  *		EINTR			Interrupted function
711  *		EACCES			Mandatory Access Control failure
712  *	file_socket:ENOTSOCK
713  *	file_socket:EBADF
714  *	getsockaddr:ENAMETOOLONG	Filename too long
715  *	getsockaddr:EINVAL		Invalid argument
716  *	getsockaddr:ENOMEM		Not enough space
717  *	getsockaddr:EFAULT		Bad address
718  *	soconnectlock:EOPNOTSUPP
719  *	soconnectlock:EISCONN
720  *	soconnectlock:???		[depends on protocol, filters]
721  *	msleep:EINTR
722  *
723  * Imputed:	so_error		error may be set from so_error, which
724  *					may have been set by soconnectlock.
725  */
726 /* ARGSUSED */
727 int
connect(proc_ref_t p,struct connect_args * uap,int32_ref_t retval)728 connect(proc_ref_t p, struct connect_args *uap, int32_ref_t retval)
729 {
730 	__pthread_testcancel(1);
731 	return connect_nocancel(p, (struct connect_nocancel_args *)uap,
732 	           retval);
733 }
734 
735 int
connect_nocancel(proc_t p,struct connect_nocancel_args * uap,int32_ref_t retval)736 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_ref_t retval)
737 {
738 #pragma unused(p, retval)
739 	socket_ref_t so;
740 	struct sockaddr_storage ss;
741 	sockaddr_ref_t  sa = NULL;
742 	int error;
743 	int fd = uap->s;
744 	boolean_t dgram;
745 
746 	AUDIT_ARG(fd, uap->s);
747 	error = file_socket(fd, &so);
748 	if (error != 0) {
749 		return error;
750 	}
751 	if (so == NULL) {
752 		error = EBADF;
753 		goto out;
754 	}
755 
756 	/*
757 	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
758 	 * if this is a datagram socket; translate for other types.
759 	 */
760 	dgram = (so->so_type == SOCK_DGRAM);
761 
762 	/* Get socket address now before we obtain socket lock */
763 	if (uap->namelen > sizeof(ss)) {
764 		error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
765 	} else {
766 		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
767 		if (error == 0) {
768 			sa = (sockaddr_ref_t)&ss;
769 		}
770 	}
771 	if (error != 0) {
772 		goto out;
773 	}
774 
775 	error = connectit(so, sa);
776 
777 	if (sa != NULL && sa != SA(&ss)) {
778 		free_sockaddr(sa);
779 	}
780 	if (error == ERESTART) {
781 		error = EINTR;
782 	}
783 out:
784 	file_drop(fd);
785 	return error;
786 }
787 
788 static int
connectx_nocancel(proc_ref_t p,connectx_args_ref_t uap,int_ref_t retval)789 connectx_nocancel(proc_ref_t p, connectx_args_ref_t uap, int_ref_t retval)
790 {
791 #pragma unused(p, retval)
792 	struct sockaddr_storage ss, sd;
793 	sockaddr_ref_t  src = NULL, dst = NULL;
794 	socket_ref_t so;
795 	int error, error1, fd = uap->socket;
796 	boolean_t dgram;
797 	sae_connid_t cid = SAE_CONNID_ANY;
798 	struct user32_sa_endpoints ep32;
799 	struct user64_sa_endpoints ep64;
800 	struct user_sa_endpoints ep;
801 	user_ssize_t bytes_written = 0;
802 	struct user_iovec *iovp;
803 	uio_t auio = NULL;
804 
805 	AUDIT_ARG(fd, uap->socket);
806 	error = file_socket(fd, &so);
807 	if (error != 0) {
808 		return error;
809 	}
810 	if (so == NULL) {
811 		error = EBADF;
812 		goto out;
813 	}
814 
815 	if (uap->endpoints == USER_ADDR_NULL) {
816 		error = EINVAL;
817 		goto out;
818 	}
819 
820 	if (IS_64BIT_PROCESS(p)) {
821 		error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
822 		if (error != 0) {
823 			goto out;
824 		}
825 
826 		ep.sae_srcif = ep64.sae_srcif;
827 		ep.sae_srcaddr = (user_addr_t)ep64.sae_srcaddr;
828 		ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
829 		ep.sae_dstaddr = (user_addr_t)ep64.sae_dstaddr;
830 		ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
831 	} else {
832 		error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
833 		if (error != 0) {
834 			goto out;
835 		}
836 
837 		ep.sae_srcif = ep32.sae_srcif;
838 		ep.sae_srcaddr = ep32.sae_srcaddr;
839 		ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
840 		ep.sae_dstaddr = ep32.sae_dstaddr;
841 		ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
842 	}
843 
844 	/*
845 	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
846 	 * if this is a datagram socket; translate for other types.
847 	 */
848 	dgram = (so->so_type == SOCK_DGRAM);
849 
850 	/* Get socket address now before we obtain socket lock */
851 	if (ep.sae_srcaddr != USER_ADDR_NULL) {
852 		if (ep.sae_srcaddrlen > sizeof(ss)) {
853 			error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
854 		} else {
855 			error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
856 			if (error == 0) {
857 				src = (sockaddr_ref_t)&ss;
858 			}
859 		}
860 
861 		if (error) {
862 			goto out;
863 		}
864 	}
865 
866 	if (ep.sae_dstaddr == USER_ADDR_NULL) {
867 		error = EINVAL;
868 		goto out;
869 	}
870 
871 	/* Get socket address now before we obtain socket lock */
872 	if (ep.sae_dstaddrlen > sizeof(sd)) {
873 		error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
874 	} else {
875 		error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
876 		if (error == 0) {
877 			dst = (sockaddr_ref_t)&sd;
878 		}
879 	}
880 
881 	if (error) {
882 		goto out;
883 	}
884 
885 	VERIFY(dst != NULL);
886 
887 	if (uap->iov != USER_ADDR_NULL) {
888 		/* Verify range before calling uio_create() */
889 		if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
890 			error = EINVAL;
891 			goto out;
892 		}
893 
894 		if (uap->len == USER_ADDR_NULL) {
895 			error = EINVAL;
896 			goto out;
897 		}
898 
899 		/* allocate a uio to hold the number of iovecs passed */
900 		auio = uio_create(uap->iovcnt, 0,
901 		    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
902 		    UIO_WRITE);
903 
904 		if (auio == NULL) {
905 			error = ENOMEM;
906 			goto out;
907 		}
908 
909 		/*
910 		 * get location of iovecs within the uio.
911 		 * then copyin the iovecs from user space.
912 		 */
913 		iovp = uio_iovsaddr(auio);
914 		if (iovp == NULL) {
915 			error = ENOMEM;
916 			goto out;
917 		}
918 		error = copyin_user_iovec_array(uap->iov,
919 		    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
920 		    uap->iovcnt, iovp);
921 		if (error != 0) {
922 			goto out;
923 		}
924 
925 		/* finish setup of uio_t */
926 		error = uio_calculateresid(auio);
927 		if (error != 0) {
928 			goto out;
929 		}
930 	}
931 
932 	error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
933 	    &cid, auio, uap->flags, &bytes_written);
934 	if (error == ERESTART) {
935 		error = EINTR;
936 	}
937 
938 	if (uap->len != USER_ADDR_NULL) {
939 		if (IS_64BIT_PROCESS(p)) {
940 			error1 = copyout(&bytes_written, uap->len, sizeof(user64_size_t));
941 		} else {
942 			error1 = copyout(&bytes_written, uap->len, sizeof(user32_size_t));
943 		}
944 		/* give precedence to connectitx errors */
945 		if ((error1 != 0) && (error == 0)) {
946 			error = error1;
947 		}
948 	}
949 
950 	if (uap->connid != USER_ADDR_NULL) {
951 		error1 = copyout(&cid, uap->connid, sizeof(cid));
952 		/* give precedence to connectitx errors */
953 		if ((error1 != 0) && (error == 0)) {
954 			error = error1;
955 		}
956 	}
957 out:
958 	file_drop(fd);
959 	if (auio != NULL) {
960 		uio_free(auio);
961 	}
962 	if (src != NULL && src != SA(&ss)) {
963 		free_sockaddr(src);
964 	}
965 	if (dst != NULL && dst != SA(&sd)) {
966 		free_sockaddr(dst);
967 	}
968 	return error;
969 }
970 
971 int
connectx(proc_ref_t p,struct connectx_args * uap,int * retval)972 connectx(proc_ref_t p, struct connectx_args *uap, int *retval)
973 {
974 	/*
975 	 * Due to similiarity with a POSIX interface, define as
976 	 * an unofficial cancellation point.
977 	 */
978 	__pthread_testcancel(1);
979 	return connectx_nocancel(p, uap, retval);
980 }
981 
982 static int
connectit(struct socket * so,sockaddr_ref_t sa)983 connectit(struct socket *so, sockaddr_ref_t sa)
984 {
985 	int error;
986 
987 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
988 #if CONFIG_MACF_SOCKET_SUBSET
989 	if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
990 		return error;
991 	}
992 #endif /* MAC_SOCKET_SUBSET */
993 
994 	socket_lock(so, 1);
995 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
996 		error = EALREADY;
997 		goto out;
998 	}
999 	error = soconnectlock(so, sa, 0);
1000 	if (error != 0) {
1001 		goto out;
1002 	}
1003 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1004 		error = EINPROGRESS;
1005 		goto out;
1006 	}
1007 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1008 		lck_mtx_t *mutex_held;
1009 
1010 		if (so->so_proto->pr_getlock != NULL) {
1011 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1012 		} else {
1013 			mutex_held = so->so_proto->pr_domain->dom_mtx;
1014 		}
1015 		error = msleep((caddr_t)&so->so_timeo, mutex_held,
1016 		    PSOCK | PCATCH, __func__, 0);
1017 		if (so->so_state & SS_DRAINING) {
1018 			error = ECONNABORTED;
1019 		}
1020 		if (error != 0) {
1021 			break;
1022 		}
1023 	}
1024 	if (error == 0) {
1025 		error = so->so_error;
1026 		so->so_error = 0;
1027 	}
1028 out:
1029 	socket_unlock(so, 1);
1030 	return error;
1031 }
1032 
1033 static int
connectitx(struct socket * so,sockaddr_ref_t src,sockaddr_ref_t dst,proc_ref_t p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uio_t auio,unsigned int flags,user_ssize_t * bytes_written)1034 connectitx(struct socket *so, sockaddr_ref_t src,
1035     sockaddr_ref_t dst, proc_ref_t p, uint32_t ifscope,
1036     sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
1037     user_ssize_t *bytes_written)
1038 {
1039 	int error;
1040 
1041 	VERIFY(dst != NULL);
1042 
1043 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1044 #if CONFIG_MACF_SOCKET_SUBSET
1045 	if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1046 		return error;
1047 	}
1048 
1049 	if (auio != NULL) {
1050 		if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1051 			return error;
1052 		}
1053 	}
1054 #endif /* MAC_SOCKET_SUBSET */
1055 
1056 	socket_lock(so, 1);
1057 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1058 		error = EALREADY;
1059 		goto out;
1060 	}
1061 
1062 	error = soconnectxlocked(so, src, dst, p, ifscope,
1063 	    aid, pcid, flags, NULL, 0, auio, bytes_written);
1064 	if (error != 0) {
1065 		goto out;
1066 	}
1067 	/*
1068 	 * If, after the call to soconnectxlocked the flag is still set (in case
1069 	 * data has been queued and the connect() has actually been triggered,
1070 	 * it will have been unset by the transport), we exit immediately. There
1071 	 * is no reason to wait on any event.
1072 	 */
1073 	if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1074 		error = 0;
1075 		goto out;
1076 	}
1077 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1078 		error = EINPROGRESS;
1079 		goto out;
1080 	}
1081 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1082 		lck_mtx_t *mutex_held;
1083 
1084 		if (so->so_proto->pr_getlock != NULL) {
1085 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1086 		} else {
1087 			mutex_held = so->so_proto->pr_domain->dom_mtx;
1088 		}
1089 		error = msleep((caddr_t)&so->so_timeo, mutex_held,
1090 		    PSOCK | PCATCH, __func__, 0);
1091 		if (so->so_state & SS_DRAINING) {
1092 			error = ECONNABORTED;
1093 		}
1094 		if (error != 0) {
1095 			break;
1096 		}
1097 	}
1098 	if (error == 0) {
1099 		error = so->so_error;
1100 		so->so_error = 0;
1101 	}
1102 out:
1103 	socket_unlock(so, 1);
1104 	return error;
1105 }
1106 
1107 int
peeloff(proc_ref_t p,struct peeloff_args * uap,int * retval)1108 peeloff(proc_ref_t p, struct peeloff_args *uap, int *retval)
1109 {
1110 #pragma unused(p, uap, retval)
1111 	/*
1112 	 * Due to similiarity with a POSIX interface, define as
1113 	 * an unofficial cancellation point.
1114 	 */
1115 	__pthread_testcancel(1);
1116 	return 0;
1117 }
1118 
1119 int
disconnectx(proc_ref_t p,struct disconnectx_args * uap,int * retval)1120 disconnectx(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1121 {
1122 	/*
1123 	 * Due to similiarity with a POSIX interface, define as
1124 	 * an unofficial cancellation point.
1125 	 */
1126 	__pthread_testcancel(1);
1127 	return disconnectx_nocancel(p, uap, retval);
1128 }
1129 
1130 static int
disconnectx_nocancel(proc_ref_t p,struct disconnectx_args * uap,int * retval)1131 disconnectx_nocancel(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1132 {
1133 #pragma unused(p, retval)
1134 	socket_ref_t so;
1135 	int fd = uap->s;
1136 	int error;
1137 
1138 	error = file_socket(fd, &so);
1139 	if (error != 0) {
1140 		return error;
1141 	}
1142 	if (so == NULL) {
1143 		error = EBADF;
1144 		goto out;
1145 	}
1146 
1147 	error = sodisconnectx(so, uap->aid, uap->cid);
1148 out:
1149 	file_drop(fd);
1150 	return error;
1151 }
1152 
1153 /*
1154  * Returns:	0			Success
1155  *	socreate:EAFNOSUPPORT
1156  *	socreate:EPROTOTYPE
1157  *	socreate:EPROTONOSUPPORT
1158  *	socreate:ENOBUFS
1159  *	socreate:ENOMEM
1160  *	socreate:EISCONN
1161  *	socreate:???			[other protocol families, IPSEC]
1162  *	falloc:ENFILE
1163  *	falloc:EMFILE
1164  *	falloc:ENOMEM
1165  *	copyout:EFAULT
1166  *	soconnect2:EINVAL
1167  *	soconnect2:EPROTOTYPE
1168  *	soconnect2:???			[other protocol families[
1169  */
1170 int
socketpair(proc_ref_t p,struct socketpair_args * uap,__unused int32_ref_t retval)1171 socketpair(proc_ref_t p, struct socketpair_args *uap,
1172     __unused int32_ref_t retval)
1173 {
1174 	fileproc_ref_t  fp1, fp2;
1175 	socket_ref_t so1, so2;
1176 	int fd, error, sv[2];
1177 
1178 	AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1179 	error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1180 	if (error) {
1181 		return error;
1182 	}
1183 	error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1184 	if (error) {
1185 		goto free1;
1186 	}
1187 
1188 	error = falloc(p, &fp1, &fd, vfs_context_current());
1189 	if (error) {
1190 		goto free2;
1191 	}
1192 	fp1->f_flag = FREAD | FWRITE;
1193 	fp1->f_ops = &socketops;
1194 	fp_set_data(fp1, so1);
1195 	sv[0] = fd;
1196 
1197 	error = falloc(p, &fp2, &fd, vfs_context_current());
1198 	if (error) {
1199 		goto free3;
1200 	}
1201 	fp2->f_flag = FREAD | FWRITE;
1202 	fp2->f_ops = &socketops;
1203 	fp_set_data(fp2, so2);
1204 	sv[1] = fd;
1205 
1206 	error = soconnect2(so1, so2);
1207 	if (error) {
1208 		goto free4;
1209 	}
1210 	if (uap->type == SOCK_DGRAM) {
1211 		/*
1212 		 * Datagram socket connection is asymmetric.
1213 		 */
1214 		error = soconnect2(so2, so1);
1215 		if (error) {
1216 			goto free4;
1217 		}
1218 	}
1219 
1220 	if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1221 		goto free4;
1222 	}
1223 
1224 	proc_fdlock(p);
1225 	procfdtbl_releasefd(p, sv[0], NULL);
1226 	procfdtbl_releasefd(p, sv[1], NULL);
1227 	fp_drop(p, sv[0], fp1, 1);
1228 	fp_drop(p, sv[1], fp2, 1);
1229 	proc_fdunlock(p);
1230 
1231 	return 0;
1232 free4:
1233 	fp_free(p, sv[1], fp2);
1234 free3:
1235 	fp_free(p, sv[0], fp1);
1236 free2:
1237 	(void) soclose(so2);
1238 free1:
1239 	(void) soclose(so1);
1240 	return error;
1241 }
1242 
1243 /*
1244  * Returns:	0			Success
1245  *		EINVAL
1246  *		ENOBUFS
1247  *		EBADF
1248  *		EPIPE
1249  *		EACCES			Mandatory Access Control failure
1250  *	file_socket:ENOTSOCK
1251  *	file_socket:EBADF
1252  *	getsockaddr:ENAMETOOLONG	Filename too long
1253  *	getsockaddr:EINVAL		Invalid argument
1254  *	getsockaddr:ENOMEM		Not enough space
1255  *	getsockaddr:EFAULT		Bad address
1256  *	<pru_sosend>:EACCES[TCP]
1257  *	<pru_sosend>:EADDRINUSE[TCP]
1258  *	<pru_sosend>:EADDRNOTAVAIL[TCP]
1259  *	<pru_sosend>:EAFNOSUPPORT[TCP]
1260  *	<pru_sosend>:EAGAIN[TCP]
1261  *	<pru_sosend>:EBADF
1262  *	<pru_sosend>:ECONNRESET[TCP]
1263  *	<pru_sosend>:EFAULT
1264  *	<pru_sosend>:EHOSTUNREACH[TCP]
1265  *	<pru_sosend>:EINTR
1266  *	<pru_sosend>:EINVAL
1267  *	<pru_sosend>:EISCONN[AF_INET]
1268  *	<pru_sosend>:EMSGSIZE[TCP]
1269  *	<pru_sosend>:ENETDOWN[TCP]
1270  *	<pru_sosend>:ENETUNREACH[TCP]
1271  *	<pru_sosend>:ENOBUFS
1272  *	<pru_sosend>:ENOMEM[TCP]
1273  *	<pru_sosend>:ENOTCONN[AF_INET]
1274  *	<pru_sosend>:EOPNOTSUPP
1275  *	<pru_sosend>:EPERM[TCP]
1276  *	<pru_sosend>:EPIPE
1277  *	<pru_sosend>:EWOULDBLOCK
1278  *	<pru_sosend>:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
1279  *	<pru_sosend>:???[AF_INET]	[whatever a filter author chooses]
1280  *	<pru_sosend>:???		[value from so_error]
1281  *	sockargs:???
1282  */
1283 static int
sendit(proc_ref_t p,struct socket * so,user_msghdr_ref_t mp,uio_t uiop,int flags,int32_ref_t retval)1284 sendit(proc_ref_t p, struct socket *so, user_msghdr_ref_t mp, uio_t uiop,
1285     int flags, int32_ref_t retval)
1286 {
1287 	mbuf_ref_t  control = NULL;
1288 	struct sockaddr_storage ss;
1289 	sockaddr_ref_t  to = NULL;
1290 	boolean_t want_free = TRUE;
1291 	int error;
1292 	user_ssize_t len;
1293 
1294 	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1295 
1296 	if (mp->msg_name != USER_ADDR_NULL) {
1297 		if (mp->msg_namelen > sizeof(ss)) {
1298 			error = getsockaddr(so, &to, mp->msg_name,
1299 			    mp->msg_namelen, TRUE);
1300 		} else {
1301 			error = getsockaddr_s(so, &ss, mp->msg_name,
1302 			    mp->msg_namelen, TRUE);
1303 			if (error == 0) {
1304 				to = (sockaddr_ref_t)&ss;
1305 				want_free = FALSE;
1306 			}
1307 		}
1308 		if (error != 0) {
1309 			goto out;
1310 		}
1311 		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1312 	}
1313 	if (mp->msg_control != USER_ADDR_NULL) {
1314 		if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1315 			error = EINVAL;
1316 			goto bad;
1317 		}
1318 		error = sockargs(&control, mp->msg_control,
1319 		    mp->msg_controllen, MT_CONTROL);
1320 		if (error != 0) {
1321 			goto bad;
1322 		}
1323 	}
1324 
1325 #if CONFIG_MACF_SOCKET_SUBSET
1326 	/*
1327 	 * We check the state without holding the socket lock;
1328 	 * if a race condition occurs, it would simply result
1329 	 * in an extra call to the MAC check function.
1330 	 */
1331 	if (to != NULL &&
1332 	    !(so->so_state & SS_DEFUNCT) &&
1333 	    (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1334 		if (control != NULL) {
1335 			m_freem(control);
1336 		}
1337 
1338 		goto bad;
1339 	}
1340 #endif /* MAC_SOCKET_SUBSET */
1341 
1342 	len = uio_resid(uiop);
1343 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1344 	    control, flags);
1345 	if (error != 0) {
1346 		if (uio_resid(uiop) != len && (error == ERESTART ||
1347 		    error == EINTR || error == EWOULDBLOCK)) {
1348 			error = 0;
1349 		}
1350 		/* Generation of SIGPIPE can be controlled per socket */
1351 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1352 		    !(flags & MSG_NOSIGNAL)) {
1353 			psignal(p, SIGPIPE);
1354 		}
1355 	}
1356 	if (error == 0) {
1357 		*retval = (int)(len - uio_resid(uiop));
1358 	}
1359 bad:
1360 	if (want_free) {
1361 		free_sockaddr(to);
1362 	}
1363 out:
1364 	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1365 
1366 	return error;
1367 }
1368 
1369 /*
1370  * Returns:	0			Success
1371  *		ENOMEM
1372  *	sendit:???			[see sendit definition in this file]
1373  *	write:???			[4056224: applicable for pipes]
1374  */
1375 int
sendto(proc_ref_t p,struct sendto_args * uap,int32_ref_t retval)1376 sendto(proc_ref_t p, struct sendto_args *uap, int32_ref_t retval)
1377 {
1378 	__pthread_testcancel(1);
1379 	return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1380 }
1381 
1382 int
sendto_nocancel(proc_ref_t p,struct sendto_nocancel_args * uap,int32_ref_t retval)1383 sendto_nocancel(proc_ref_t p,
1384     struct sendto_nocancel_args *uap,
1385     int32_ref_t retval)
1386 {
1387 	struct user_msghdr msg;
1388 	int error;
1389 	uio_t auio = NULL;
1390 	socket_ref_t so;
1391 
1392 	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1393 	AUDIT_ARG(fd, uap->s);
1394 
1395 	if (uap->flags & MSG_SKIPCFIL) {
1396 		error = EPERM;
1397 		goto done;
1398 	}
1399 
1400 	if (uap->len > LONG_MAX) {
1401 		error = EINVAL;
1402 		goto done;
1403 	}
1404 
1405 	auio = uio_create(1, 0,
1406 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1407 	    UIO_WRITE);
1408 	if (auio == NULL) {
1409 		error = ENOMEM;
1410 		goto done;
1411 	}
1412 	uio_addiov(auio, uap->buf, uap->len);
1413 
1414 	msg.msg_name = uap->to;
1415 	msg.msg_namelen = uap->tolen;
1416 	/* no need to set up msg_iov.  sendit uses uio_t we send it */
1417 	msg.msg_iov = 0;
1418 	msg.msg_iovlen = 0;
1419 	msg.msg_control = 0;
1420 	msg.msg_flags = 0;
1421 
1422 	error = file_socket(uap->s, &so);
1423 	if (error) {
1424 		goto done;
1425 	}
1426 
1427 	if (so == NULL) {
1428 		error = EBADF;
1429 	} else {
1430 		error = sendit(p, so, &msg, auio, uap->flags, retval);
1431 	}
1432 
1433 	file_drop(uap->s);
1434 done:
1435 	if (auio != NULL) {
1436 		uio_free(auio);
1437 	}
1438 
1439 	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1440 
1441 	return error;
1442 }
1443 
1444 /*
1445  * Returns:	0			Success
1446  *		ENOBUFS
1447  *	copyin:EFAULT
1448  *	sendit:???			[see sendit definition in this file]
1449  */
1450 int
sendmsg(proc_ref_t p,struct sendmsg_args * uap,int32_ref_t retval)1451 sendmsg(proc_ref_t p, struct sendmsg_args *uap, int32_ref_t retval)
1452 {
1453 	__pthread_testcancel(1);
1454 	return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1455 	           retval);
1456 }
1457 
1458 int
sendmsg_nocancel(proc_ref_t p,struct sendmsg_nocancel_args * uap,int32_ref_t retval)1459 sendmsg_nocancel(proc_ref_t p, struct sendmsg_nocancel_args *uap,
1460     int32_ref_t retval)
1461 {
1462 	struct user32_msghdr msg32;
1463 	struct user64_msghdr msg64;
1464 	struct user_msghdr user_msg;
1465 	caddr_t msghdrp;
1466 	int     size_of_msghdr;
1467 	int error;
1468 	uio_t auio = NULL;
1469 	struct user_iovec *iovp;
1470 	socket_ref_t so;
1471 
1472 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1473 
1474 	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1475 	AUDIT_ARG(fd, uap->s);
1476 
1477 	if (uap->flags & MSG_SKIPCFIL) {
1478 		error = EPERM;
1479 		goto done;
1480 	}
1481 
1482 	if (is_p_64bit_process) {
1483 		msghdrp = (caddr_t)&msg64;
1484 		size_of_msghdr = sizeof(msg64);
1485 	} else {
1486 		msghdrp = (caddr_t)&msg32;
1487 		size_of_msghdr = sizeof(msg32);
1488 	}
1489 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
1490 	if (error) {
1491 		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1492 		return error;
1493 	}
1494 
1495 	if (is_p_64bit_process) {
1496 		user_msg.msg_flags = msg64.msg_flags;
1497 		user_msg.msg_controllen = msg64.msg_controllen;
1498 		user_msg.msg_control = (user_addr_t)msg64.msg_control;
1499 		user_msg.msg_iovlen = msg64.msg_iovlen;
1500 		user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
1501 		user_msg.msg_namelen = msg64.msg_namelen;
1502 		user_msg.msg_name = (user_addr_t)msg64.msg_name;
1503 	} else {
1504 		user_msg.msg_flags = msg32.msg_flags;
1505 		user_msg.msg_controllen = msg32.msg_controllen;
1506 		user_msg.msg_control = msg32.msg_control;
1507 		user_msg.msg_iovlen = msg32.msg_iovlen;
1508 		user_msg.msg_iov = msg32.msg_iov;
1509 		user_msg.msg_namelen = msg32.msg_namelen;
1510 		user_msg.msg_name = msg32.msg_name;
1511 	}
1512 
1513 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1514 		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1515 		    0, 0, 0, 0);
1516 		return EMSGSIZE;
1517 	}
1518 
1519 	/* allocate a uio large enough to hold the number of iovecs passed */
1520 	auio = uio_create(user_msg.msg_iovlen, 0,
1521 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1522 	    UIO_WRITE);
1523 	if (auio == NULL) {
1524 		error = ENOBUFS;
1525 		goto done;
1526 	}
1527 
1528 	if (user_msg.msg_iovlen) {
1529 		/*
1530 		 * get location of iovecs within the uio.
1531 		 * then copyin the iovecs from user space.
1532 		 */
1533 		iovp = uio_iovsaddr(auio);
1534 		if (iovp == NULL) {
1535 			error = ENOBUFS;
1536 			goto done;
1537 		}
1538 		error = copyin_user_iovec_array(user_msg.msg_iov,
1539 		    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1540 		    user_msg.msg_iovlen, iovp);
1541 		if (error) {
1542 			goto done;
1543 		}
1544 		user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1545 
1546 		/* finish setup of uio_t */
1547 		error = uio_calculateresid(auio);
1548 		if (error) {
1549 			goto done;
1550 		}
1551 	} else {
1552 		user_msg.msg_iov = 0;
1553 	}
1554 
1555 	/* msg_flags is ignored for send */
1556 	user_msg.msg_flags = 0;
1557 
1558 	error = file_socket(uap->s, &so);
1559 	if (error) {
1560 		goto done;
1561 	}
1562 	if (so == NULL) {
1563 		error = EBADF;
1564 	} else {
1565 		error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1566 	}
1567 	file_drop(uap->s);
1568 done:
1569 	if (auio != NULL) {
1570 		uio_free(auio);
1571 	}
1572 	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1573 
1574 	return error;
1575 }
1576 
1577 int
sendmsg_x(proc_ref_t p,struct sendmsg_x_args * uap,user_ssize_t * retval)1578 sendmsg_x(proc_ref_t p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1579 {
1580 	int error = 0;
1581 	user_msghdr_x_ptr_t user_msg_x = NULL;
1582 	uio_ref_ptr_t uiop = NULL;
1583 	socket_ref_t so;
1584 	u_int i;
1585 	sockaddr_ref_t to = NULL;
1586 	user_ssize_t len_before = 0, len_after;
1587 	int need_drop = 0;
1588 	size_t size_of_msghdr;
1589 	void_ptr_t umsgp = NULL;
1590 	u_int uiocnt = 0;
1591 	int has_addr_or_ctl = 0;
1592 
1593 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1594 
1595 	size_of_msghdr = IS_64BIT_PROCESS(p) ?
1596 	    sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1597 
1598 	if (uap->flags & MSG_SKIPCFIL) {
1599 		error = EPERM;
1600 		goto out;
1601 	}
1602 
1603 	error = file_socket(uap->s, &so);
1604 	if (error) {
1605 		goto out;
1606 	}
1607 	need_drop = 1;
1608 	if (so == NULL) {
1609 		error = EBADF;
1610 		goto out;
1611 	}
1612 
1613 	/*
1614 	 * Input parameter range check
1615 	 */
1616 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1617 		error = EINVAL;
1618 		goto out;
1619 	}
1620 	/*
1621 	 * Clip to max currently allowed
1622 	 */
1623 	if (uap->cnt > somaxsendmsgx) {
1624 		uap->cnt = somaxsendmsgx;
1625 	}
1626 
1627 	user_msg_x = kalloc_data(uap->cnt * sizeof(struct user_msghdr_x),
1628 	    Z_WAITOK | Z_ZERO);
1629 	if (user_msg_x == NULL) {
1630 		DBG_PRINTF("%s user_msg_x alloc failed\n", __func__);
1631 		error = ENOMEM;
1632 		goto out;
1633 	}
1634 	uiop = kalloc_type(uio_ref_t, uap->cnt, Z_WAITOK | Z_ZERO);
1635 	if (uiop == NULL) {
1636 		DBG_PRINTF("%s uiop alloc failed\n", __func__);
1637 		error = ENOMEM;
1638 		goto out;
1639 	}
1640 
1641 	umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
1642 	if (umsgp == NULL) {
1643 		printf("%s user_msg_x alloc failed\n", __func__);
1644 		error = ENOMEM;
1645 		goto out;
1646 	}
1647 	error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1648 	if (error) {
1649 		DBG_PRINTF("%s copyin() failed\n", __func__);
1650 		goto out;
1651 	}
1652 	error = internalize_user_msghdr_array(umsgp,
1653 	    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1654 	    UIO_WRITE, uap->cnt, user_msg_x, uiop);
1655 	if (error) {
1656 		DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
1657 		goto out;
1658 	}
1659 	/*
1660 	 * Make sure the size of each message iovec and
1661 	 * the aggregate size of all the iovec is valid
1662 	 */
1663 	if (uio_array_is_valid(uiop, uap->cnt) == false) {
1664 		error = EINVAL;
1665 		goto out;
1666 	}
1667 
1668 	/*
1669 	 * Sanity check on passed arguments
1670 	 */
1671 	for (i = 0; i < uap->cnt; i++) {
1672 		struct user_msghdr_x *mp = user_msg_x + i;
1673 
1674 		/*
1675 		 * No flags on send message
1676 		 */
1677 		if (mp->msg_flags != 0) {
1678 			error = EINVAL;
1679 			goto out;
1680 		}
1681 		/*
1682 		 * No support for address or ancillary data (yet)
1683 		 */
1684 		if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
1685 			has_addr_or_ctl = 1;
1686 		}
1687 
1688 		if (mp->msg_control != USER_ADDR_NULL ||
1689 		    mp->msg_controllen != 0) {
1690 			has_addr_or_ctl = 1;
1691 		}
1692 
1693 #if CONFIG_MACF_SOCKET_SUBSET
1694 		/*
1695 		 * We check the state without holding the socket lock;
1696 		 * if a race condition occurs, it would simply result
1697 		 * in an extra call to the MAC check function.
1698 		 *
1699 		 * Note: The following check is never true taken with the
1700 		 * current limitation that we do not accept to pass an address,
1701 		 * this is effectively placeholder code. If we add support for
1702 		 * addresses, we will have to check every address.
1703 		 */
1704 		if (to != NULL &&
1705 		    !(so->so_state & SS_DEFUNCT) &&
1706 		    (error = mac_socket_check_send(kauth_cred_get(), so, to))
1707 		    != 0) {
1708 			goto out;
1709 		}
1710 #endif /* MAC_SOCKET_SUBSET */
1711 	}
1712 
1713 	len_before = uio_array_resid(uiop, uap->cnt);
1714 
1715 	/*
1716 	 * Feed list of packets at once only for connected socket without
1717 	 * control message
1718 	 */
1719 	if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1720 	    pru_sosend_list_notsupp &&
1721 	    has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1722 		error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1723 		    uap->cnt, uap->flags);
1724 	} else {
1725 		for (i = 0; i < uap->cnt; i++) {
1726 			struct user_msghdr_x *mp = user_msg_x + i;
1727 			struct user_msghdr user_msg;
1728 			uio_t auio = uiop[i];
1729 			int32_t tmpval;
1730 
1731 			user_msg.msg_flags = mp->msg_flags;
1732 			user_msg.msg_controllen = mp->msg_controllen;
1733 			user_msg.msg_control = mp->msg_control;
1734 			user_msg.msg_iovlen = mp->msg_iovlen;
1735 			user_msg.msg_iov = mp->msg_iov;
1736 			user_msg.msg_namelen = mp->msg_namelen;
1737 			user_msg.msg_name = mp->msg_name;
1738 
1739 			error = sendit(p, so, &user_msg, auio, uap->flags,
1740 			    &tmpval);
1741 			if (error != 0) {
1742 				break;
1743 			}
1744 			uiocnt += 1;
1745 		}
1746 	}
1747 	len_after = uio_array_resid(uiop, uap->cnt);
1748 
1749 	VERIFY(len_after <= len_before);
1750 
1751 	if (error != 0) {
1752 		if (len_after != len_before && (error == ERESTART ||
1753 		    error == EINTR || error == EWOULDBLOCK ||
1754 		    error == ENOBUFS)) {
1755 			error = 0;
1756 		}
1757 		/* Generation of SIGPIPE can be controlled per socket */
1758 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1759 		    !(uap->flags & MSG_NOSIGNAL)) {
1760 			psignal(p, SIGPIPE);
1761 		}
1762 	}
1763 	if (error == 0) {
1764 		externalize_user_msghdr_array(umsgp,
1765 		    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1766 		    UIO_WRITE, uiocnt, user_msg_x, uiop);
1767 
1768 		*retval = (int)(uiocnt);
1769 	}
1770 out:
1771 	if (need_drop) {
1772 		file_drop(uap->s);
1773 	}
1774 	kfree_data(umsgp, uap->cnt * size_of_msghdr);
1775 	if (uiop != NULL) {
1776 		free_uio_array(uiop, uap->cnt);
1777 		kfree_type(uio_ref_t, uap->cnt, uiop);
1778 	}
1779 	kfree_data(user_msg_x, uap->cnt * sizeof(struct user_msghdr_x));
1780 
1781 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1782 
1783 	return error;
1784 }
1785 
1786 
1787 static int
copyout_sa(sockaddr_ref_t fromsa,user_addr_t name,socklen_t * namelen)1788 copyout_sa(sockaddr_ref_t fromsa, user_addr_t name, socklen_t *namelen)
1789 {
1790 	int error = 0;
1791 	socklen_t sa_len = 0;
1792 	ssize_t len;
1793 
1794 	len = *namelen;
1795 	if (len <= 0 || fromsa == 0) {
1796 		len = 0;
1797 	} else {
1798 #ifndef MIN
1799 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1800 #endif
1801 		sa_len = fromsa->sa_len;
1802 		len = MIN((unsigned int)len, sa_len);
1803 		error = copyout(fromsa, name, (unsigned)len);
1804 		if (error) {
1805 			goto out;
1806 		}
1807 	}
1808 	*namelen = sa_len;
1809 out:
1810 	return 0;
1811 }
1812 
1813 static int
copyout_control(proc_ref_t p,mbuf_ref_t m,user_addr_t control,socklen_ref_t controllen,int_ref_t flags,socket_ref_t so)1814 copyout_control(proc_ref_t p, mbuf_ref_t m, user_addr_t control,
1815     socklen_ref_t controllen, int_ref_t flags, socket_ref_t so)
1816 {
1817 	int error = 0;
1818 	socklen_t len;
1819 	user_addr_t ctlbuf;
1820 	struct inpcb *inp = NULL;
1821 	bool want_pktinfo = false;
1822 	bool seen_pktinfo = false;
1823 
1824 	if (so != NULL && (SOCK_DOM(so) == PF_INET6 || SOCK_DOM(so) == PF_INET)) {
1825 		inp = sotoinpcb(so);
1826 		want_pktinfo = (inp->inp_flags & IN6P_PKTINFO) != 0;
1827 	}
1828 
1829 	len = *controllen;
1830 	*controllen = 0;
1831 	ctlbuf = control;
1832 
1833 	while (m && len > 0) {
1834 		socklen_t tocopy;
1835 		struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1836 		socklen_t cp_size = CMSG_ALIGN(cp->cmsg_len);
1837 		socklen_t buflen = m->m_len;
1838 
1839 		while (buflen > 0 && len > 0) {
1840 			/*
1841 			 * SCM_TIMESTAMP hack because  struct timeval has a
1842 			 * different size for 32 bits and 64 bits processes
1843 			 */
1844 			if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1845 				unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
1846 				struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1847 				socklen_t tmp_space;
1848 				struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1849 
1850 				tmp_cp->cmsg_level = SOL_SOCKET;
1851 				tmp_cp->cmsg_type = SCM_TIMESTAMP;
1852 
1853 				if (proc_is64bit(p)) {
1854 					struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1855 
1856 					os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
1857 					os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
1858 
1859 					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1860 					tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1861 				} else {
1862 					struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1863 
1864 					tv32->tv_sec = (user32_time_t)tv->tv_sec;
1865 					tv32->tv_usec = tv->tv_usec;
1866 
1867 					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1868 					tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1869 				}
1870 				if (len >= tmp_space) {
1871 					tocopy = tmp_space;
1872 				} else {
1873 					*flags |= MSG_CTRUNC;
1874 					tocopy = len;
1875 				}
1876 				error = copyout(tmp_buffer, ctlbuf, tocopy);
1877 				if (error) {
1878 					goto out;
1879 				}
1880 			} else {
1881 				/* If socket has flow tracking and socket did not request address, ignore it */
1882 				if (SOFLOW_ENABLED(so) &&
1883 				    ((cp->cmsg_level == IPPROTO_IP && cp->cmsg_type == IP_RECVDSTADDR && inp != NULL &&
1884 				    !(inp->inp_flags & INP_RECVDSTADDR)) ||
1885 				    (cp->cmsg_level == IPPROTO_IPV6 && (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO) && inp &&
1886 				    !(inp->inp_flags & IN6P_PKTINFO)))) {
1887 					tocopy = 0;
1888 				} else {
1889 					if (cp_size > buflen) {
1890 						panic("cp_size > buflen, something"
1891 						    "wrong with alignment!");
1892 					}
1893 					if (len >= cp_size) {
1894 						tocopy = cp_size;
1895 					} else {
1896 						*flags |= MSG_CTRUNC;
1897 						tocopy = len;
1898 					}
1899 					error = copyout((caddr_t) cp, ctlbuf, tocopy);
1900 					if (error) {
1901 						goto out;
1902 					}
1903 					if (want_pktinfo && cp->cmsg_level == IPPROTO_IPV6 &&
1904 					    (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO)) {
1905 						seen_pktinfo = true;
1906 					}
1907 				}
1908 			}
1909 
1910 			ctlbuf += tocopy;
1911 			len -= tocopy;
1912 
1913 			buflen -= cp_size;
1914 			cp = (struct cmsghdr *)(void *)
1915 			    ((unsigned char *) cp + cp_size);
1916 			cp_size = CMSG_ALIGN(cp->cmsg_len);
1917 		}
1918 
1919 		m = m->m_next;
1920 	}
1921 	*controllen = (socklen_t)(ctlbuf - control);
1922 out:
1923 	if (want_pktinfo && !seen_pktinfo) {
1924 		missingpktinfo += 1;
1925 #if (DEBUG || DEVELOPMENT)
1926 		char pname[MAXCOMLEN];
1927 		char local[MAX_IPv6_STR_LEN + 6];
1928 		char remote[MAX_IPv6_STR_LEN + 6];
1929 
1930 		proc_name(so->last_pid, pname, sizeof(MAXCOMLEN));
1931 		if (inp->inp_vflag & INP_IPV6) {
1932 			inet_ntop(AF_INET6, &inp->in6p_laddr.s6_addr, local, sizeof(local));
1933 			inet_ntop(AF_INET6, &inp->in6p_faddr.s6_addr, remote, sizeof(local));
1934 		} else {
1935 			inet_ntop(AF_INET, &inp->inp_laddr.s_addr, local, sizeof(local));
1936 			inet_ntop(AF_INET, &inp->inp_faddr.s_addr, remote, sizeof(local));
1937 		}
1938 
1939 		os_log(OS_LOG_DEFAULT,
1940 		    "cmsg IPV6_PKTINFO missing for %s:%u > %s:%u proc %s.%u error %d\n",
1941 		    local, ntohs(inp->inp_lport), remote, ntohs(inp->inp_fport),
1942 		    pname, so->last_pid, error);
1943 #endif /* (DEBUG || DEVELOPMENT) */
1944 	}
1945 	return error;
1946 }
1947 
1948 /*
1949  * Returns:	0			Success
1950  *		ENOTSOCK
1951  *		EINVAL
1952  *		EBADF
1953  *		EACCES			Mandatory Access Control failure
1954  *	copyout:EFAULT
1955  *	fp_lookup:EBADF
1956  *	<pru_soreceive>:ENOBUFS
1957  *	<pru_soreceive>:ENOTCONN
1958  *	<pru_soreceive>:EWOULDBLOCK
1959  *	<pru_soreceive>:EFAULT
1960  *	<pru_soreceive>:EINTR
1961  *	<pru_soreceive>:EBADF
1962  *	<pru_soreceive>:EINVAL
1963  *	<pru_soreceive>:EMSGSIZE
1964  *	<pru_soreceive>:???
1965  *
1966  * Notes:	Additional return values from calls through <pru_soreceive>
1967  *		depend on protocols other than TCP or AF_UNIX, which are
1968  *		documented above.
1969  */
1970 static int
recvit(proc_ref_t p,int s,user_msghdr_ref_t mp,uio_t uiop,user_addr_t namelenp,int32_ref_t retval)1971 recvit(proc_ref_t p, int s, user_msghdr_ref_t mp, uio_t uiop,
1972     user_addr_t namelenp, int32_ref_t retval)
1973 {
1974 	ssize_t len;
1975 	int error;
1976 	mbuf_ref_t  control = 0;
1977 	socket_ref_t so;
1978 	sockaddr_ref_t  fromsa = 0;
1979 	fileproc_ref_t  fp;
1980 
1981 	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1982 	if ((error = fp_get_ftype(p, s, DTYPE_SOCKET, ENOTSOCK, &fp))) {
1983 		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1984 		return error;
1985 	}
1986 	so = (struct socket *)fp_get_data(fp);
1987 
1988 #if CONFIG_MACF_SOCKET_SUBSET
1989 	/*
1990 	 * We check the state without holding the socket lock;
1991 	 * if a race condition occurs, it would simply result
1992 	 * in an extra call to the MAC check function.
1993 	 */
1994 	if (!(so->so_state & SS_DEFUNCT) &&
1995 	    !(so->so_state & SS_ISCONNECTED) &&
1996 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1997 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
1998 		goto out1;
1999 	}
2000 #endif /* MAC_SOCKET_SUBSET */
2001 	if (uio_resid(uiop) < 0 || uio_resid(uiop) > INT_MAX) {
2002 		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
2003 		error = EINVAL;
2004 		goto out1;
2005 	}
2006 
2007 	len = uio_resid(uiop);
2008 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
2009 	    NULL, mp->msg_control ? &control : NULL,
2010 	    &mp->msg_flags);
2011 	if (fromsa) {
2012 		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
2013 		    fromsa);
2014 	}
2015 	if (error) {
2016 		if (uio_resid(uiop) != len && (error == ERESTART ||
2017 		    error == EINTR || error == EWOULDBLOCK)) {
2018 			error = 0;
2019 		}
2020 	}
2021 	if (error) {
2022 		goto out;
2023 	}
2024 
2025 	*retval = (int32_t)(len - uio_resid(uiop));
2026 
2027 	if (mp->msg_name) {
2028 		error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
2029 		if (error) {
2030 			goto out;
2031 		}
2032 		/* return the actual, untruncated address length */
2033 		if (namelenp &&
2034 		    (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2035 		    sizeof(int)))) {
2036 			goto out;
2037 		}
2038 	}
2039 
2040 	if (mp->msg_control) {
2041 		error = copyout_control(p, control, mp->msg_control,
2042 		    &mp->msg_controllen, &mp->msg_flags, so);
2043 	}
2044 out:
2045 	free_sockaddr(fromsa);
2046 	if (control) {
2047 		m_freem(control);
2048 	}
2049 	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2050 out1:
2051 	fp_drop(p, s, fp, 0);
2052 	return error;
2053 }
2054 
2055 /*
2056  * Returns:	0			Success
2057  *		ENOMEM
2058  *	copyin:EFAULT
2059  *	recvit:???
2060  *	read:???			[4056224: applicable for pipes]
2061  *
2062  * Notes:	The read entry point is only called as part of support for
2063  *		binary backward compatability; new code should use read
2064  *		instead of recv or recvfrom when attempting to read data
2065  *		from pipes.
2066  *
2067  *		For full documentation of the return codes from recvit, see
2068  *		the block header for the recvit function.
2069  */
2070 int
recvfrom(proc_ref_t p,struct recvfrom_args * uap,int32_ref_t retval)2071 recvfrom(proc_ref_t p, struct recvfrom_args *uap, int32_ref_t retval)
2072 {
2073 	__pthread_testcancel(1);
2074 	return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2075 	           retval);
2076 }
2077 
2078 int
recvfrom_nocancel(proc_ref_t p,struct recvfrom_nocancel_args * uap,int32_ref_t retval)2079 recvfrom_nocancel(proc_ref_t p, struct recvfrom_nocancel_args *uap,
2080     int32_ref_t retval)
2081 {
2082 	struct user_msghdr msg;
2083 	int error;
2084 	uio_t auio = NULL;
2085 
2086 	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2087 	AUDIT_ARG(fd, uap->s);
2088 
2089 	if (uap->fromlenaddr) {
2090 		error = copyin(uap->fromlenaddr,
2091 		    (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2092 		if (error) {
2093 			return error;
2094 		}
2095 	} else {
2096 		msg.msg_namelen = 0;
2097 	}
2098 	msg.msg_name = uap->from;
2099 	auio = uio_create(1, 0,
2100 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2101 	    UIO_READ);
2102 	if (auio == NULL) {
2103 		return ENOMEM;
2104 	}
2105 
2106 	uio_addiov(auio, uap->buf, uap->len);
2107 	/* no need to set up msg_iov.  recvit uses uio_t we send it */
2108 	msg.msg_iov = 0;
2109 	msg.msg_iovlen = 0;
2110 	msg.msg_control = 0;
2111 	msg.msg_controllen = 0;
2112 	msg.msg_flags = uap->flags;
2113 	error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2114 	if (auio != NULL) {
2115 		uio_free(auio);
2116 	}
2117 
2118 	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2119 
2120 	return error;
2121 }
2122 
2123 /*
2124  * Returns:	0			Success
2125  *		EMSGSIZE
2126  *		ENOMEM
2127  *	copyin:EFAULT
2128  *	copyout:EFAULT
2129  *	recvit:???
2130  *
2131  * Notes:	For full documentation of the return codes from recvit, see
2132  *		the block header for the recvit function.
2133  */
2134 int
recvmsg(proc_ref_t p,struct recvmsg_args * uap,int32_ref_t retval)2135 recvmsg(proc_ref_t p, struct recvmsg_args *uap, int32_ref_t retval)
2136 {
2137 	__pthread_testcancel(1);
2138 	return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2139 	           retval);
2140 }
2141 
2142 int
recvmsg_nocancel(proc_ref_t p,struct recvmsg_nocancel_args * uap,int32_ref_t retval)2143 recvmsg_nocancel(proc_ref_t p, struct recvmsg_nocancel_args *uap,
2144     int32_ref_t retval)
2145 {
2146 	struct user32_msghdr msg32;
2147 	struct user64_msghdr msg64;
2148 	struct user_msghdr user_msg;
2149 	caddr_t msghdrp;
2150 	int     size_of_msghdr;
2151 	user_addr_t uiov;
2152 	int error;
2153 	uio_t auio = NULL;
2154 	struct user_iovec *iovp;
2155 
2156 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2157 
2158 	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2159 	AUDIT_ARG(fd, uap->s);
2160 	if (is_p_64bit_process) {
2161 		msghdrp = (caddr_t)&msg64;
2162 		size_of_msghdr = sizeof(msg64);
2163 	} else {
2164 		msghdrp = (caddr_t)&msg32;
2165 		size_of_msghdr = sizeof(msg32);
2166 	}
2167 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
2168 	if (error) {
2169 		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2170 		return error;
2171 	}
2172 
2173 	/* only need to copy if user process is not 64-bit */
2174 	if (is_p_64bit_process) {
2175 		user_msg.msg_flags = msg64.msg_flags;
2176 		user_msg.msg_controllen = msg64.msg_controllen;
2177 		user_msg.msg_control = (user_addr_t)msg64.msg_control;
2178 		user_msg.msg_iovlen = msg64.msg_iovlen;
2179 		user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
2180 		user_msg.msg_namelen = msg64.msg_namelen;
2181 		user_msg.msg_name = (user_addr_t)msg64.msg_name;
2182 	} else {
2183 		user_msg.msg_flags = msg32.msg_flags;
2184 		user_msg.msg_controllen = msg32.msg_controllen;
2185 		user_msg.msg_control = msg32.msg_control;
2186 		user_msg.msg_iovlen = msg32.msg_iovlen;
2187 		user_msg.msg_iov = msg32.msg_iov;
2188 		user_msg.msg_namelen = msg32.msg_namelen;
2189 		user_msg.msg_name = msg32.msg_name;
2190 	}
2191 
2192 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2193 		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2194 		    0, 0, 0, 0);
2195 		return EMSGSIZE;
2196 	}
2197 
2198 	user_msg.msg_flags = uap->flags;
2199 
2200 	/* allocate a uio large enough to hold the number of iovecs passed */
2201 	auio = uio_create(user_msg.msg_iovlen, 0,
2202 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
2203 	    UIO_READ);
2204 	if (auio == NULL) {
2205 		error = ENOMEM;
2206 		goto done;
2207 	}
2208 
2209 	/*
2210 	 * get location of iovecs within the uio.  then copyin the iovecs from
2211 	 * user space.
2212 	 */
2213 	iovp = uio_iovsaddr(auio);
2214 	if (iovp == NULL) {
2215 		error = ENOMEM;
2216 		goto done;
2217 	}
2218 	uiov = user_msg.msg_iov;
2219 	user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2220 	error = copyin_user_iovec_array(uiov,
2221 	    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2222 	    user_msg.msg_iovlen, iovp);
2223 	if (error) {
2224 		goto done;
2225 	}
2226 
2227 	/* finish setup of uio_t */
2228 	error = uio_calculateresid(auio);
2229 	if (error) {
2230 		goto done;
2231 	}
2232 
2233 	error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2234 	if (!error) {
2235 		user_msg.msg_iov = uiov;
2236 		if (is_p_64bit_process) {
2237 			msg64.msg_flags = user_msg.msg_flags;
2238 			msg64.msg_controllen = user_msg.msg_controllen;
2239 			msg64.msg_control = user_msg.msg_control;
2240 			msg64.msg_iovlen = user_msg.msg_iovlen;
2241 			msg64.msg_iov = user_msg.msg_iov;
2242 			msg64.msg_namelen = user_msg.msg_namelen;
2243 			msg64.msg_name = user_msg.msg_name;
2244 		} else {
2245 			msg32.msg_flags = user_msg.msg_flags;
2246 			msg32.msg_controllen = user_msg.msg_controllen;
2247 			msg32.msg_control = (user32_addr_t)user_msg.msg_control;
2248 			msg32.msg_iovlen = user_msg.msg_iovlen;
2249 			msg32.msg_iov = (user32_addr_t)user_msg.msg_iov;
2250 			msg32.msg_namelen = user_msg.msg_namelen;
2251 			msg32.msg_name = (user32_addr_t)user_msg.msg_name;
2252 		}
2253 		error = copyout(msghdrp, uap->msg, size_of_msghdr);
2254 	}
2255 done:
2256 	if (auio != NULL) {
2257 		uio_free(auio);
2258 	}
2259 	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2260 	return error;
2261 }
2262 
2263 int
recvmsg_x(proc_ref_t p,struct recvmsg_x_args * uap,user_ssize_t * retval)2264 recvmsg_x(proc_ref_t p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2265 {
2266 	int error = EOPNOTSUPP;
2267 	user_msghdr_x_ptr_t user_msg_x = NULL;
2268 	recv_msg_elem_ptr_t recv_msg_array = NULL;
2269 	socket_ref_t so;
2270 	user_ssize_t len_before = 0, len_after;
2271 	int need_drop = 0;
2272 	size_t size_of_msghdr;
2273 	void_ptr_t umsgp = NULL;
2274 	u_int i;
2275 	u_int uiocnt;
2276 
2277 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2278 
2279 	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2280 
2281 	size_of_msghdr = is_p_64bit_process ?
2282 	    sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2283 
2284 	error = file_socket(uap->s, &so);
2285 	if (error) {
2286 		goto out;
2287 	}
2288 	need_drop = 1;
2289 	if (so == NULL) {
2290 		error = EBADF;
2291 		goto out;
2292 	}
2293 	/*
2294 	 * Support only a subset of message flags
2295 	 */
2296 	if (uap->flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA |  MSG_NBIO)) {
2297 		return EOPNOTSUPP;
2298 	}
2299 	/*
2300 	 * Input parameter range check
2301 	 */
2302 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2303 		error = EINVAL;
2304 		goto out;
2305 	}
2306 	if (uap->cnt > somaxrecvmsgx) {
2307 		uap->cnt = somaxrecvmsgx;
2308 	}
2309 
2310 	user_msg_x = kalloc_data(uap->cnt * sizeof(struct user_msghdr_x),
2311 	    Z_WAITOK | Z_ZERO);
2312 	if (user_msg_x == NULL) {
2313 		DBG_PRINTF("%s user_msg_x alloc failed\n", __func__);
2314 		error = ENOMEM;
2315 		goto out;
2316 	}
2317 	recv_msg_array = alloc_recv_msg_array(uap->cnt);
2318 	if (recv_msg_array == NULL) {
2319 		DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
2320 		error = ENOMEM;
2321 		goto out;
2322 	}
2323 
2324 	umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
2325 	if (umsgp == NULL) {
2326 		DBG_PRINTF("%s umsgp alloc failed\n", __func__);
2327 		error = ENOMEM;
2328 		goto out;
2329 	}
2330 	error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2331 	if (error) {
2332 		DBG_PRINTF("%s copyin() failed\n", __func__);
2333 		goto out;
2334 	}
2335 	error = internalize_recv_msghdr_array(umsgp,
2336 	    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2337 	    UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2338 	if (error) {
2339 		DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
2340 		goto out;
2341 	}
2342 	/*
2343 	 * Make sure the size of each message iovec and
2344 	 * the aggregate size of all the iovec is valid
2345 	 */
2346 	if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2347 		error = EINVAL;
2348 		goto out;
2349 	}
2350 	/*
2351 	 * Sanity check on passed arguments
2352 	 */
2353 	for (i = 0; i < uap->cnt; i++) {
2354 		struct user_msghdr_x *mp = user_msg_x + i;
2355 
2356 		if (mp->msg_flags != 0) {
2357 			error = EINVAL;
2358 			goto out;
2359 		}
2360 	}
2361 #if CONFIG_MACF_SOCKET_SUBSET
2362 	/*
2363 	 * We check the state without holding the socket lock;
2364 	 * if a race condition occurs, it would simply result
2365 	 * in an extra call to the MAC check function.
2366 	 */
2367 	if (!(so->so_state & SS_DEFUNCT) &&
2368 	    !(so->so_state & SS_ISCONNECTED) &&
2369 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2370 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2371 		goto out;
2372 	}
2373 #endif /* MAC_SOCKET_SUBSET */
2374 
2375 	len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2376 
2377 	if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2378 	    pru_soreceive_list_notsupp &&
2379 	    somaxrecvmsgx == 0) {
2380 		error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2381 		    recv_msg_array, uap->cnt, &uap->flags);
2382 	} else {
2383 		int flags = uap->flags;
2384 
2385 		for (i = 0; i < uap->cnt; i++) {
2386 			struct recv_msg_elem *recv_msg_elem;
2387 			uio_t auio;
2388 			sockaddr_ref_ref_t psa;
2389 			struct mbuf **controlp;
2390 
2391 			recv_msg_elem = recv_msg_array + i;
2392 			auio = recv_msg_elem->uio;
2393 
2394 			/*
2395 			 * Do not block if we got at least one packet
2396 			 */
2397 			if (i > 0) {
2398 				flags |= MSG_DONTWAIT;
2399 			}
2400 
2401 			psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2402 			    &recv_msg_elem->psa : NULL;
2403 			controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2404 			    &recv_msg_elem->controlp : NULL;
2405 
2406 			error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2407 			    auio, NULL, controlp, &flags);
2408 			if (error) {
2409 				break;
2410 			}
2411 			/*
2412 			 * We have some data
2413 			 */
2414 			recv_msg_elem->which |= SOCK_MSG_DATA;
2415 			/*
2416 			 * Set the messages flags for this packet
2417 			 */
2418 			flags &= ~MSG_DONTWAIT;
2419 			recv_msg_elem->flags = flags;
2420 			/*
2421 			 * Stop on partial copy
2422 			 */
2423 			if (recv_msg_elem->flags & (MSG_RCVMORE | MSG_TRUNC)) {
2424 				break;
2425 			}
2426 		}
2427 	}
2428 
2429 	len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2430 
2431 	if (error) {
2432 		if (len_after != len_before && (error == ERESTART ||
2433 		    error == EINTR || error == EWOULDBLOCK)) {
2434 			error = 0;
2435 		} else {
2436 			goto out;
2437 		}
2438 	}
2439 
2440 	uiocnt = externalize_recv_msghdr_array(p, so, umsgp,
2441 	    uap->cnt, user_msg_x, recv_msg_array, &error);
2442 	if (error != 0) {
2443 		goto out;
2444 	}
2445 
2446 	error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2447 	if (error) {
2448 		DBG_PRINTF("%s copyout() failed\n", __func__);
2449 		goto out;
2450 	}
2451 	*retval = (int)(uiocnt);
2452 
2453 out:
2454 	if (need_drop) {
2455 		file_drop(uap->s);
2456 	}
2457 	kfree_data(umsgp, uap->cnt * size_of_msghdr);
2458 	free_recv_msg_array(recv_msg_array, uap->cnt);
2459 	kfree_data(user_msg_x, uap->cnt * sizeof(struct user_msghdr_x));
2460 
2461 	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2462 
2463 	return error;
2464 }
2465 
2466 /*
2467  * Returns:	0			Success
2468  *		EBADF
2469  *	file_socket:ENOTSOCK
2470  *	file_socket:EBADF
2471  *	soshutdown:EINVAL
2472  *	soshutdown:ENOTCONN
2473  *	soshutdown:EADDRNOTAVAIL[TCP]
2474  *	soshutdown:ENOBUFS[TCP]
2475  *	soshutdown:EMSGSIZE[TCP]
2476  *	soshutdown:EHOSTUNREACH[TCP]
2477  *	soshutdown:ENETUNREACH[TCP]
2478  *	soshutdown:ENETDOWN[TCP]
2479  *	soshutdown:ENOMEM[TCP]
2480  *	soshutdown:EACCES[TCP]
2481  *	soshutdown:EMSGSIZE[TCP]
2482  *	soshutdown:ENOBUFS[TCP]
2483  *	soshutdown:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
2484  *	soshutdown:???			[other protocol families]
2485  */
2486 /* ARGSUSED */
2487 int
shutdown(__unused proc_ref_t p,struct shutdown_args * uap,__unused int32_ref_t retval)2488 shutdown(__unused proc_ref_t p, struct shutdown_args *uap,
2489     __unused int32_ref_t retval)
2490 {
2491 	socket_ref_t so;
2492 	int error;
2493 
2494 	AUDIT_ARG(fd, uap->s);
2495 	error = file_socket(uap->s, &so);
2496 	if (error) {
2497 		return error;
2498 	}
2499 	if (so == NULL) {
2500 		error = EBADF;
2501 		goto out;
2502 	}
2503 	error =  soshutdown((struct socket *)so, uap->how);
2504 out:
2505 	file_drop(uap->s);
2506 	return error;
2507 }
2508 
2509 /*
2510  * Returns:	0			Success
2511  *		EFAULT
2512  *		EINVAL
2513  *		EACCES			Mandatory Access Control failure
2514  *	file_socket:ENOTSOCK
2515  *	file_socket:EBADF
2516  *	sosetopt:EINVAL
2517  *	sosetopt:ENOPROTOOPT
2518  *	sosetopt:ENOBUFS
2519  *	sosetopt:EDOM
2520  *	sosetopt:EFAULT
2521  *	sosetopt:EOPNOTSUPP[AF_UNIX]
2522  *	sosetopt:???
2523  */
2524 /* ARGSUSED */
2525 int
setsockopt(proc_ref_t p,setsockopt_args_ref_t uap,__unused int32_ref_t retval)2526 setsockopt(proc_ref_t p, setsockopt_args_ref_t uap,
2527     __unused int32_ref_t retval)
2528 {
2529 	socket_ref_t so;
2530 	struct sockopt sopt;
2531 	int error;
2532 
2533 	AUDIT_ARG(fd, uap->s);
2534 	if (uap->val == 0 && uap->valsize != 0) {
2535 		return EFAULT;
2536 	}
2537 	/* No bounds checking on size (it's unsigned) */
2538 
2539 	error = file_socket(uap->s, &so);
2540 	if (error) {
2541 		return error;
2542 	}
2543 
2544 	sopt.sopt_dir = SOPT_SET;
2545 	sopt.sopt_level = uap->level;
2546 	sopt.sopt_name = uap->name;
2547 	sopt.sopt_val = uap->val;
2548 	sopt.sopt_valsize = uap->valsize;
2549 	sopt.sopt_p = p;
2550 
2551 	if (so == NULL) {
2552 		error = EINVAL;
2553 		goto out;
2554 	}
2555 #if CONFIG_MACF_SOCKET_SUBSET
2556 	if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2557 	    &sopt)) != 0) {
2558 		goto out;
2559 	}
2560 #endif /* MAC_SOCKET_SUBSET */
2561 	error = sosetoptlock(so, &sopt, 1);     /* will lock socket */
2562 out:
2563 	file_drop(uap->s);
2564 	return error;
2565 }
2566 
2567 
2568 
2569 /*
2570  * Returns:	0			Success
2571  *		EINVAL
2572  *		EBADF
2573  *		EACCES			Mandatory Access Control failure
2574  *	copyin:EFAULT
2575  *	copyout:EFAULT
2576  *	file_socket:ENOTSOCK
2577  *	file_socket:EBADF
2578  *	sogetopt:???
2579  */
2580 int
getsockopt(proc_ref_t p,struct getsockopt_args * uap,__unused int32_ref_t retval)2581 getsockopt(proc_ref_t p, struct getsockopt_args  *uap,
2582     __unused int32_ref_t retval)
2583 {
2584 	int             error;
2585 	socklen_t       valsize;
2586 	struct sockopt  sopt;
2587 	socket_ref_t so;
2588 
2589 	error = file_socket(uap->s, &so);
2590 	if (error) {
2591 		return error;
2592 	}
2593 	if (uap->val) {
2594 		error = copyin(uap->avalsize, (caddr_t)&valsize,
2595 		    sizeof(valsize));
2596 		if (error) {
2597 			goto out;
2598 		}
2599 		/* No bounds checking on size (it's unsigned) */
2600 	} else {
2601 		valsize = 0;
2602 	}
2603 	sopt.sopt_dir = SOPT_GET;
2604 	sopt.sopt_level = uap->level;
2605 	sopt.sopt_name = uap->name;
2606 	sopt.sopt_val = uap->val;
2607 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2608 	sopt.sopt_p = p;
2609 
2610 	if (so == NULL) {
2611 		error = EBADF;
2612 		goto out;
2613 	}
2614 #if CONFIG_MACF_SOCKET_SUBSET
2615 	if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2616 	    &sopt)) != 0) {
2617 		goto out;
2618 	}
2619 #endif /* MAC_SOCKET_SUBSET */
2620 	error = sogetoptlock((struct socket *)so, &sopt, 1);    /* will lock */
2621 	if (error == 0) {
2622 		valsize = (socklen_t)sopt.sopt_valsize;
2623 		error = copyout((caddr_t)&valsize, uap->avalsize,
2624 		    sizeof(valsize));
2625 	}
2626 out:
2627 	file_drop(uap->s);
2628 	return error;
2629 }
2630 
2631 
2632 /*
2633  * Get socket name.
2634  *
2635  * Returns:	0			Success
2636  *		EBADF
2637  *	file_socket:ENOTSOCK
2638  *	file_socket:EBADF
2639  *	copyin:EFAULT
2640  *	copyout:EFAULT
2641  *	<pru_sockaddr>:ENOBUFS[TCP]
2642  *	<pru_sockaddr>:ECONNRESET[TCP]
2643  *	<pru_sockaddr>:EINVAL[AF_UNIX]
2644  *	<sf_getsockname>:???
2645  */
2646 /* ARGSUSED */
2647 int
getsockname(__unused proc_ref_t p,struct getsockname_args * uap,__unused int32_ref_t retval)2648 getsockname(__unused proc_ref_t p, struct getsockname_args *uap,
2649     __unused int32_ref_t retval)
2650 {
2651 	socket_ref_t so;
2652 	sockaddr_ref_t  sa;
2653 	socklen_t len;
2654 	socklen_t sa_len;
2655 	int error;
2656 
2657 	error = file_socket(uap->fdes, &so);
2658 	if (error) {
2659 		return error;
2660 	}
2661 	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2662 	if (error) {
2663 		goto out;
2664 	}
2665 	if (so == NULL) {
2666 		error = EBADF;
2667 		goto out;
2668 	}
2669 	sa = 0;
2670 	socket_lock(so, 1);
2671 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2672 	if (error == 0) {
2673 		error = sflt_getsockname(so, &sa);
2674 		if (error == EJUSTRETURN) {
2675 			error = 0;
2676 		}
2677 	}
2678 	socket_unlock(so, 1);
2679 	if (error) {
2680 		goto bad;
2681 	}
2682 	if (sa == 0) {
2683 		len = 0;
2684 		goto gotnothing;
2685 	}
2686 
2687 	sa_len = sa->sa_len;
2688 	len = MIN(len, sa_len);
2689 	error = copyout((caddr_t)sa, uap->asa, len);
2690 	if (error) {
2691 		goto bad;
2692 	}
2693 	/* return the actual, untruncated address length */
2694 	len = sa_len;
2695 gotnothing:
2696 	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2697 bad:
2698 	free_sockaddr(sa);
2699 out:
2700 	file_drop(uap->fdes);
2701 	return error;
2702 }
2703 
2704 /*
2705  * Get name of peer for connected socket.
2706  *
2707  * Returns:	0			Success
2708  *		EBADF
2709  *		EINVAL
2710  *		ENOTCONN
2711  *	file_socket:ENOTSOCK
2712  *	file_socket:EBADF
2713  *	copyin:EFAULT
2714  *	copyout:EFAULT
2715  *	<pru_peeraddr>:???
2716  *	<sf_getpeername>:???
2717  */
2718 /* ARGSUSED */
2719 int
getpeername(__unused proc_ref_t p,struct getpeername_args * uap,__unused int32_ref_t retval)2720 getpeername(__unused proc_ref_t p, struct getpeername_args *uap,
2721     __unused int32_ref_t retval)
2722 {
2723 	socket_ref_t so;
2724 	sockaddr_ref_t  sa;
2725 	socklen_t len;
2726 	socklen_t sa_len;
2727 	int error;
2728 
2729 	error = file_socket(uap->fdes, &so);
2730 	if (error) {
2731 		return error;
2732 	}
2733 	if (so == NULL) {
2734 		error = EBADF;
2735 		goto out;
2736 	}
2737 
2738 	socket_lock(so, 1);
2739 
2740 	if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2741 	    (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2742 		/* the socket has been shutdown, no more getpeername's */
2743 		socket_unlock(so, 1);
2744 		error = EINVAL;
2745 		goto out;
2746 	}
2747 
2748 	if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
2749 		socket_unlock(so, 1);
2750 		error = ENOTCONN;
2751 		goto out;
2752 	}
2753 	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2754 	if (error) {
2755 		socket_unlock(so, 1);
2756 		goto out;
2757 	}
2758 	sa = 0;
2759 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2760 	if (error == 0) {
2761 		error = sflt_getpeername(so, &sa);
2762 		if (error == EJUSTRETURN) {
2763 			error = 0;
2764 		}
2765 	}
2766 	socket_unlock(so, 1);
2767 	if (error) {
2768 		goto bad;
2769 	}
2770 	if (sa == 0) {
2771 		len = 0;
2772 		goto gotnothing;
2773 	}
2774 	sa_len = sa->sa_len;
2775 	len = MIN(len, sa_len);
2776 	error = copyout(sa, uap->asa, len);
2777 	if (error) {
2778 		goto bad;
2779 	}
2780 	/* return the actual, untruncated address length */
2781 	len = sa_len;
2782 gotnothing:
2783 	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2784 bad:
2785 	free_sockaddr(sa);
2786 out:
2787 	file_drop(uap->fdes);
2788 	return error;
2789 }
2790 
2791 int
sockargs(struct mbuf ** mp,user_addr_t data,socklen_t buflen,int type)2792 sockargs(struct mbuf **mp, user_addr_t data, socklen_t buflen, int type)
2793 {
2794 	sockaddr_ref_t sa;
2795 	struct mbuf *m;
2796 	int error;
2797 	socklen_t alloc_buflen = buflen;
2798 
2799 	if (buflen > INT_MAX / 2) {
2800 		return EINVAL;
2801 	}
2802 	if (type == MT_SONAME && (buflen > SOCK_MAXADDRLEN ||
2803 	    buflen < offsetof(struct sockaddr, sa_data[0]))) {
2804 		return EINVAL;
2805 	}
2806 	if (type == MT_CONTROL && buflen < sizeof(struct cmsghdr)) {
2807 		return EINVAL;
2808 	}
2809 
2810 #ifdef __LP64__
2811 	/*
2812 	 * The fd's in the buffer must expand to be pointers, thus we need twice
2813 	 * as much space
2814 	 */
2815 	if (type == MT_CONTROL) {
2816 		alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
2817 		    sizeof(struct cmsghdr);
2818 	}
2819 #endif
2820 	if (alloc_buflen > MLEN) {
2821 		if (type == MT_SONAME && alloc_buflen <= 112) {
2822 			alloc_buflen = MLEN;    /* unix domain compat. hack */
2823 		} else if (alloc_buflen > MCLBYTES) {
2824 			return EINVAL;
2825 		}
2826 	}
2827 	m = m_get(M_WAIT, type);
2828 	if (m == NULL) {
2829 		return ENOBUFS;
2830 	}
2831 	if (alloc_buflen > MLEN) {
2832 		MCLGET(m, M_WAIT);
2833 		if ((m->m_flags & M_EXT) == 0) {
2834 			m_free(m);
2835 			return ENOBUFS;
2836 		}
2837 	}
2838 	/*
2839 	 * K64: We still copyin the original buflen because it gets expanded
2840 	 * later and we lie about the size of the mbuf because it only affects
2841 	 * unp_* functions
2842 	 */
2843 	m->m_len = buflen;
2844 	error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2845 	if (error) {
2846 		(void) m_free(m);
2847 	} else {
2848 		*mp = m;
2849 		if (type == MT_SONAME) {
2850 			VERIFY(buflen <= SOCK_MAXADDRLEN);
2851 			sa = mtod(m, sockaddr_ref_t);
2852 			sa->sa_len = (__uint8_t)buflen;
2853 		}
2854 	}
2855 	return error;
2856 }
2857 
2858 /*
2859  * Given a user_addr_t of length len, allocate and fill out a *sa.
2860  *
2861  * Returns:	0			Success
2862  *		ENAMETOOLONG		Filename too long
2863  *		EINVAL			Invalid argument
2864  *		ENOMEM			Not enough space
2865  *		copyin:EFAULT		Bad address
2866  */
2867 static int
getsockaddr(struct socket * so,sockaddr_ref_ref_t namp,user_addr_t uaddr,size_t len,boolean_t translate_unspec)2868 getsockaddr(struct socket *so, sockaddr_ref_ref_t namp, user_addr_t uaddr,
2869     size_t len, boolean_t translate_unspec)
2870 {
2871 	sockaddr_ref_t  sa;
2872 	int error;
2873 
2874 	if (len > SOCK_MAXADDRLEN) {
2875 		return ENAMETOOLONG;
2876 	}
2877 
2878 	if (len < offsetof(struct sockaddr, sa_data[0])) {
2879 		return EINVAL;
2880 	}
2881 
2882 	sa = (sockaddr_ref_t)alloc_sockaddr(len, Z_WAITOK | Z_NOFAIL);
2883 
2884 	error = copyin(uaddr, (caddr_t)sa, len);
2885 	if (error) {
2886 		free_sockaddr(sa);
2887 	} else {
2888 		/*
2889 		 * Force sa_family to AF_INET on AF_INET sockets to handle
2890 		 * legacy applications that use AF_UNSPEC (0).  On all other
2891 		 * sockets we leave it unchanged and let the lower layer
2892 		 * handle it.
2893 		 */
2894 		if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2895 		    SOCK_CHECK_DOM(so, PF_INET) &&
2896 		    len == sizeof(struct sockaddr_in)) {
2897 			sa->sa_family = AF_INET;
2898 		}
2899 		VERIFY(len <= SOCK_MAXADDRLEN);
2900 		sa = *&sa;
2901 		sa->sa_len = (__uint8_t)len;
2902 		*namp = sa;
2903 	}
2904 	return error;
2905 }
2906 
2907 static int
getsockaddr_s(struct socket * so,sockaddr_storage_ref_t ss,user_addr_t uaddr,size_t len,boolean_t translate_unspec)2908 getsockaddr_s(struct socket *so, sockaddr_storage_ref_t ss,
2909     user_addr_t uaddr, size_t len, boolean_t translate_unspec)
2910 {
2911 	int error;
2912 
2913 	if (ss == NULL || uaddr == USER_ADDR_NULL ||
2914 	    len < offsetof(struct sockaddr, sa_data[0])) {
2915 		return EINVAL;
2916 	}
2917 
2918 	/*
2919 	 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2920 	 * so the check here is inclusive.
2921 	 */
2922 	if (len > sizeof(*ss)) {
2923 		return ENAMETOOLONG;
2924 	}
2925 
2926 	bzero(ss, sizeof(*ss));
2927 	error = copyin(uaddr, (caddr_t)ss, len);
2928 	if (error == 0) {
2929 		/*
2930 		 * Force sa_family to AF_INET on AF_INET sockets to handle
2931 		 * legacy applications that use AF_UNSPEC (0).  On all other
2932 		 * sockets we leave it unchanged and let the lower layer
2933 		 * handle it.
2934 		 */
2935 		if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2936 		    SOCK_CHECK_DOM(so, PF_INET) &&
2937 		    len == sizeof(struct sockaddr_in)) {
2938 			ss->ss_family = AF_INET;
2939 		}
2940 
2941 		ss->ss_len = (__uint8_t)len;
2942 	}
2943 	return error;
2944 }
2945 
2946 int
internalize_user_msghdr_array(const void_ptr_t src,int spacetype,int direction,u_int count,user_msghdr_x_ptr_t dst,uio_ref_ptr_t uiop)2947 internalize_user_msghdr_array(const void_ptr_t src, int spacetype, int direction,
2948     u_int count, user_msghdr_x_ptr_t dst, uio_ref_ptr_t uiop)
2949 {
2950 	int error = 0;
2951 	u_int i;
2952 	u_int namecnt = 0;
2953 	u_int ctlcnt = 0;
2954 
2955 	for (i = 0; i < count; i++) {
2956 		uio_t auio;
2957 		struct user_iovec *iovp;
2958 		struct user_msghdr_x *user_msg = dst + i;
2959 
2960 		if (spacetype == UIO_USERSPACE64) {
2961 			const struct user64_msghdr_x *msghdr64;
2962 
2963 			msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2964 
2965 			user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
2966 			user_msg->msg_namelen = msghdr64->msg_namelen;
2967 			user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
2968 			user_msg->msg_iovlen = msghdr64->msg_iovlen;
2969 			user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
2970 			user_msg->msg_controllen = msghdr64->msg_controllen;
2971 			user_msg->msg_flags = msghdr64->msg_flags;
2972 			user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
2973 		} else {
2974 			const struct user32_msghdr_x *msghdr32;
2975 
2976 			msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2977 
2978 			user_msg->msg_name = msghdr32->msg_name;
2979 			user_msg->msg_namelen = msghdr32->msg_namelen;
2980 			user_msg->msg_iov = msghdr32->msg_iov;
2981 			user_msg->msg_iovlen = msghdr32->msg_iovlen;
2982 			user_msg->msg_control = msghdr32->msg_control;
2983 			user_msg->msg_controllen = msghdr32->msg_controllen;
2984 			user_msg->msg_flags = msghdr32->msg_flags;
2985 			user_msg->msg_datalen = msghdr32->msg_datalen;
2986 		}
2987 
2988 		if (user_msg->msg_iovlen <= 0 ||
2989 		    user_msg->msg_iovlen > UIO_MAXIOV) {
2990 			error = EMSGSIZE;
2991 			goto done;
2992 		}
2993 		auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2994 		    direction);
2995 		if (auio == NULL) {
2996 			error = ENOMEM;
2997 			goto done;
2998 		}
2999 		uiop[i] = auio;
3000 
3001 		iovp = uio_iovsaddr(auio);
3002 		if (iovp == NULL) {
3003 			error = ENOMEM;
3004 			goto done;
3005 		}
3006 		error = copyin_user_iovec_array(user_msg->msg_iov,
3007 		    spacetype, user_msg->msg_iovlen, iovp);
3008 		if (error) {
3009 			goto done;
3010 		}
3011 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3012 
3013 		error = uio_calculateresid(auio);
3014 		if (error) {
3015 			goto done;
3016 		}
3017 		user_msg->msg_datalen = uio_resid(auio);
3018 
3019 		if (user_msg->msg_name && user_msg->msg_namelen) {
3020 			namecnt++;
3021 		}
3022 		if (user_msg->msg_control && user_msg->msg_controllen) {
3023 			ctlcnt++;
3024 		}
3025 	}
3026 done:
3027 
3028 	return error;
3029 }
3030 
3031 int
internalize_recv_msghdr_array(const void_ptr_t src,int spacetype,int direction,u_int count,user_msghdr_x_ptr_t dst,recv_msg_elem_ptr_t recv_msg_array)3032 internalize_recv_msghdr_array(const void_ptr_t src, int spacetype, int direction,
3033     u_int count, user_msghdr_x_ptr_t dst,
3034     recv_msg_elem_ptr_t recv_msg_array)
3035 {
3036 	int error = 0;
3037 	u_int i;
3038 
3039 	for (i = 0; i < count; i++) {
3040 		struct user_iovec *iovp;
3041 		struct user_msghdr_x *user_msg = dst + i;
3042 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3043 
3044 		if (spacetype == UIO_USERSPACE64) {
3045 			const struct user64_msghdr_x *msghdr64;
3046 
3047 			msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3048 
3049 			user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
3050 			user_msg->msg_namelen = msghdr64->msg_namelen;
3051 			user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
3052 			user_msg->msg_iovlen = msghdr64->msg_iovlen;
3053 			user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
3054 			user_msg->msg_controllen = msghdr64->msg_controllen;
3055 			user_msg->msg_flags = msghdr64->msg_flags;
3056 			user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
3057 		} else {
3058 			const struct user32_msghdr_x *msghdr32;
3059 
3060 			msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3061 
3062 			user_msg->msg_name = msghdr32->msg_name;
3063 			user_msg->msg_namelen = msghdr32->msg_namelen;
3064 			user_msg->msg_iov = msghdr32->msg_iov;
3065 			user_msg->msg_iovlen = msghdr32->msg_iovlen;
3066 			user_msg->msg_control = msghdr32->msg_control;
3067 			user_msg->msg_controllen = msghdr32->msg_controllen;
3068 			user_msg->msg_flags = msghdr32->msg_flags;
3069 			user_msg->msg_datalen = msghdr32->msg_datalen;
3070 		}
3071 
3072 		if (user_msg->msg_iovlen <= 0 ||
3073 		    user_msg->msg_iovlen > UIO_MAXIOV) {
3074 			error = EMSGSIZE;
3075 			goto done;
3076 		}
3077 		recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3078 		    spacetype, direction);
3079 		if (recv_msg_elem->uio == NULL) {
3080 			error = ENOMEM;
3081 			goto done;
3082 		}
3083 
3084 		iovp = uio_iovsaddr(recv_msg_elem->uio);
3085 		if (iovp == NULL) {
3086 			error = ENOMEM;
3087 			goto done;
3088 		}
3089 		error = copyin_user_iovec_array(user_msg->msg_iov,
3090 		    spacetype, user_msg->msg_iovlen, iovp);
3091 		if (error) {
3092 			goto done;
3093 		}
3094 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3095 
3096 		error = uio_calculateresid(recv_msg_elem->uio);
3097 		if (error) {
3098 			goto done;
3099 		}
3100 		user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3101 
3102 		if (user_msg->msg_name && user_msg->msg_namelen) {
3103 			recv_msg_elem->which |= SOCK_MSG_SA;
3104 		}
3105 		if (user_msg->msg_control && user_msg->msg_controllen) {
3106 			recv_msg_elem->which |= SOCK_MSG_CONTROL;
3107 		}
3108 	}
3109 done:
3110 
3111 	return error;
3112 }
3113 
3114 void
externalize_user_msghdr_array(void_ptr_t dst,int spacetype,int direction,u_int count,const user_msghdr_x_ptr_t src,uio_ref_ptr_t uiop)3115 externalize_user_msghdr_array(void_ptr_t dst, int spacetype, int direction,
3116     u_int count, const user_msghdr_x_ptr_t src, uio_ref_ptr_t uiop)
3117 {
3118 #pragma unused(direction)
3119 	u_int i;
3120 
3121 	for (i = 0; i < count; i++) {
3122 		const struct user_msghdr_x *user_msg = src + i;
3123 		uio_t auio = uiop[i];
3124 		user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3125 
3126 		if (spacetype == UIO_USERSPACE64) {
3127 			struct user64_msghdr_x *msghdr64;
3128 
3129 			msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3130 
3131 			msghdr64->msg_flags = user_msg->msg_flags;
3132 			msghdr64->msg_datalen = len;
3133 		} else {
3134 			struct user32_msghdr_x *msghdr32;
3135 
3136 			msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3137 
3138 			msghdr32->msg_flags = user_msg->msg_flags;
3139 			msghdr32->msg_datalen = (user32_size_t)len;
3140 		}
3141 	}
3142 }
3143 
3144 u_int
externalize_recv_msghdr_array(proc_ref_t p,socket_ref_t so,void_ptr_t dst,u_int count,user_msghdr_x_ptr_t src,recv_msg_elem_ptr_t recv_msg_array,int_ref_t ret_error)3145 externalize_recv_msghdr_array(proc_ref_t p, socket_ref_t so, void_ptr_t dst,
3146     u_int count, user_msghdr_x_ptr_t src,
3147     recv_msg_elem_ptr_t recv_msg_array, int_ref_t ret_error)
3148 {
3149 	u_int i;
3150 	u_int retcnt = 0;
3151 	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3152 
3153 	*ret_error = 0;
3154 
3155 	for (i = 0; i < count; i++) {
3156 		struct user_msghdr_x *user_msg = src + i;
3157 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3158 		user_ssize_t len = 0;
3159 		int error;
3160 
3161 		len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3162 
3163 		if ((recv_msg_elem->which & SOCK_MSG_DATA)) {
3164 			retcnt++;
3165 
3166 			if (recv_msg_elem->which & SOCK_MSG_SA) {
3167 				error = copyout_sa(recv_msg_elem->psa, user_msg->msg_name,
3168 				    &user_msg->msg_namelen);
3169 				if (error != 0) {
3170 					*ret_error = error;
3171 					return 0;
3172 				}
3173 			}
3174 			if (recv_msg_elem->which & SOCK_MSG_CONTROL) {
3175 				error = copyout_control(p, recv_msg_elem->controlp,
3176 				    user_msg->msg_control, &user_msg->msg_controllen,
3177 				    &recv_msg_elem->flags, so);
3178 				if (error != 0) {
3179 					*ret_error = error;
3180 					return 0;
3181 				}
3182 			}
3183 		}
3184 
3185 		if (spacetype == UIO_USERSPACE64) {
3186 			struct user64_msghdr_x *msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3187 
3188 			msghdr64->msg_namelen = user_msg->msg_namelen;
3189 			msghdr64->msg_controllen = user_msg->msg_controllen;
3190 			msghdr64->msg_flags = recv_msg_elem->flags;
3191 			msghdr64->msg_datalen = len;
3192 		} else {
3193 			struct user32_msghdr_x *msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3194 
3195 			msghdr32->msg_namelen = user_msg->msg_namelen;
3196 			msghdr32->msg_controllen = user_msg->msg_controllen;
3197 			msghdr32->msg_flags = recv_msg_elem->flags;
3198 			msghdr32->msg_datalen = (user32_size_t)len;
3199 		}
3200 	}
3201 	return retcnt;
3202 }
3203 
3204 void
free_uio_array(uio_ref_ptr_t uiop,u_int count)3205 free_uio_array(uio_ref_ptr_t uiop, u_int count)
3206 {
3207 	u_int i;
3208 
3209 	for (i = 0; i < count; i++) {
3210 		if (uiop[i] != NULL) {
3211 			uio_free(uiop[i]);
3212 		}
3213 	}
3214 }
3215 
3216 /* Extern linkage requires using __counted_by instead of bptr */
3217 __private_extern__ user_ssize_t
uio_array_resid(uio_ref_t * __counted_by (count)uiop,u_int count)3218 uio_array_resid(uio_ref_t * __counted_by(count)uiop, u_int count)
3219 {
3220 	user_ssize_t len = 0;
3221 	u_int i;
3222 
3223 	for (i = 0; i < count; i++) {
3224 		struct uio *auio = uiop[i];
3225 
3226 		if (auio != NULL) {
3227 			len += uio_resid(auio);
3228 		}
3229 	}
3230 	return len;
3231 }
3232 
3233 static boolean_t
uio_array_is_valid(uio_ref_ptr_t uiop,u_int count)3234 uio_array_is_valid(uio_ref_ptr_t uiop, u_int count)
3235 {
3236 	user_ssize_t len = 0;
3237 	u_int i;
3238 
3239 	for (i = 0; i < count; i++) {
3240 		struct uio *auio = uiop[i];
3241 
3242 		if (auio != NULL) {
3243 			user_ssize_t resid = uio_resid(auio);
3244 
3245 			/*
3246 			 * Sanity check on the validity of the iovec:
3247 			 * no point of going over sb_max
3248 			 */
3249 			if (resid < 0 || resid > (user_ssize_t)sb_max) {
3250 				return false;
3251 			}
3252 
3253 			len += resid;
3254 			if (len < 0 || len > (user_ssize_t)sb_max) {
3255 				return false;
3256 			}
3257 		}
3258 	}
3259 	return true;
3260 }
3261 
3262 
3263 recv_msg_elem_ptr_t
alloc_recv_msg_array(u_int count)3264 alloc_recv_msg_array(u_int count)
3265 {
3266 	return kalloc_type(struct recv_msg_elem, count, Z_WAITOK | Z_ZERO);
3267 }
3268 
3269 void
free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array,u_int count)3270 free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3271 {
3272 	if (recv_msg_array == NULL) {
3273 		return;
3274 	}
3275 	for (uint32_t i = 0; i < count; i++) {
3276 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3277 
3278 		if (recv_msg_elem->uio != NULL) {
3279 			uio_free(recv_msg_elem->uio);
3280 		}
3281 		free_sockaddr(recv_msg_elem->psa);
3282 		if (recv_msg_elem->controlp != NULL) {
3283 			m_freem(recv_msg_elem->controlp);
3284 		}
3285 	}
3286 	kfree_type(struct recv_msg_elem, count, recv_msg_array);
3287 }
3288 
3289 
3290 /* Extern linkage requires using __counted_by instead of bptr */
3291 __private_extern__ user_ssize_t
recv_msg_array_resid(struct recv_msg_elem * __counted_by (count)recv_msg_array,u_int count)3292 recv_msg_array_resid(struct recv_msg_elem * __counted_by(count)recv_msg_array, u_int count)
3293 {
3294 	user_ssize_t len = 0;
3295 	u_int i;
3296 
3297 	for (i = 0; i < count; i++) {
3298 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3299 
3300 		if (recv_msg_elem->uio != NULL) {
3301 			len += uio_resid(recv_msg_elem->uio);
3302 		}
3303 	}
3304 	return len;
3305 }
3306 
3307 int
recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array,u_int count)3308 recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3309 {
3310 	user_ssize_t len = 0;
3311 	u_int i;
3312 
3313 	for (i = 0; i < count; i++) {
3314 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3315 
3316 		if (recv_msg_elem->uio != NULL) {
3317 			user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3318 
3319 			/*
3320 			 * Sanity check on the validity of the iovec:
3321 			 * no point of going over sb_max
3322 			 */
3323 			if (resid < 0 || (u_int32_t)resid > sb_max) {
3324 				return 0;
3325 			}
3326 
3327 			len += resid;
3328 			if (len < 0 || (u_int32_t)len > sb_max) {
3329 				return 0;
3330 			}
3331 		}
3332 	}
3333 	return 1;
3334 }
3335 
3336 #if SENDFILE
3337 
3338 #define SFUIOBUFS 64
3339 
3340 /* Macros to compute the number of mbufs needed depending on cluster size */
3341 #define HOWMANY_16K(n)  ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3342 #define HOWMANY_4K(n)   ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3343 
3344 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3345 #define SENDFILE_MAX_BYTES      (SFUIOBUFS << PGSHIFT)
3346 
3347 /* Upper send limit in the number of mbuf clusters */
3348 #define SENDFILE_MAX_16K        HOWMANY_16K(SENDFILE_MAX_BYTES)
3349 #define SENDFILE_MAX_4K         HOWMANY_4K(SENDFILE_MAX_BYTES)
3350 
3351 static void
alloc_sendpkt(int how,size_t pktlen,unsigned int * maxchunks,mbuf_ref_ref_t m,boolean_t jumbocl)3352 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3353     mbuf_ref_ref_t m, boolean_t jumbocl)
3354 {
3355 	unsigned int needed;
3356 
3357 	if (pktlen == 0) {
3358 		panic("%s: pktlen (%ld) must be non-zero", __func__, pktlen);
3359 	}
3360 
3361 	/*
3362 	 * Try to allocate for the whole thing.  Since we want full control
3363 	 * over the buffer size and be able to accept partial result, we can't
3364 	 * use mbuf_allocpacket().  The logic below is similar to sosend().
3365 	 */
3366 	*m = NULL;
3367 	if (pktlen > MBIGCLBYTES && jumbocl) {
3368 		needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3369 		*m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3370 	}
3371 	if (*m == NULL) {
3372 		needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3373 		*m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3374 	}
3375 
3376 	/*
3377 	 * Our previous attempt(s) at allocation had failed; the system
3378 	 * may be short on mbufs, and we want to block until they are
3379 	 * available.  This time, ask just for 1 mbuf and don't return
3380 	 * until we get it.
3381 	 */
3382 	if (*m == NULL) {
3383 		needed = 1;
3384 		*m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3385 	}
3386 	if (*m == NULL) {
3387 		panic("%s: blocking allocation returned NULL", __func__);
3388 	}
3389 
3390 	*maxchunks = needed;
3391 }
3392 
3393 /*
3394  * sendfile(2).
3395  * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3396  *	 struct sf_hdtr *hdtr, int flags)
3397  *
3398  * Send a file specified by 'fd' and starting at 'offset' to a socket
3399  * specified by 's'. Send only '*nbytes' of the file or until EOF if
3400  * *nbytes == 0. Optionally add a header and/or trailer to the socket
3401  * output. If specified, write the total number of bytes sent into *nbytes.
3402  */
3403 int
sendfile(proc_ref_t p,struct sendfile_args * uap,__unused int * retval)3404 sendfile(proc_ref_t p, struct sendfile_args *uap, __unused int *retval)
3405 {
3406 	fileproc_ref_t  fp;
3407 	vnode_ref_t  vp;
3408 	socket_ref_t so;
3409 	struct writev_nocancel_args nuap;
3410 	user_ssize_t writev_retval;
3411 	struct user_sf_hdtr user_hdtr;
3412 	struct user32_sf_hdtr user32_hdtr;
3413 	struct user64_sf_hdtr user64_hdtr;
3414 	off_t off, xfsize;
3415 	off_t nbytes = 0, sbytes = 0;
3416 	int error = 0;
3417 	size_t sizeof_hdtr;
3418 	off_t file_size;
3419 	struct vfs_context context = *vfs_context_current();
3420 
3421 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
3422 
3423 	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3424 	    0, 0, 0, 0);
3425 
3426 	AUDIT_ARG(fd, uap->fd);
3427 	AUDIT_ARG(value32, uap->s);
3428 
3429 	/*
3430 	 * Do argument checking. Must be a regular file in, stream
3431 	 * type and connected socket out, positive offset.
3432 	 */
3433 	if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3434 		goto done;
3435 	}
3436 	if ((fp->f_flag & FREAD) == 0) {
3437 		error = EBADF;
3438 		goto done1;
3439 	}
3440 	if (vnode_isreg(vp) == 0) {
3441 		error = ENOTSUP;
3442 		goto done1;
3443 	}
3444 	error = file_socket(uap->s, &so);
3445 	if (error) {
3446 		goto done1;
3447 	}
3448 	if (so == NULL) {
3449 		error = EBADF;
3450 		goto done2;
3451 	}
3452 	if (so->so_type != SOCK_STREAM) {
3453 		error = EINVAL;
3454 		goto done2;
3455 	}
3456 	if ((so->so_state & SS_ISCONNECTED) == 0) {
3457 		error = ENOTCONN;
3458 		goto done2;
3459 	}
3460 	if (uap->offset < 0) {
3461 		error = EINVAL;
3462 		goto done2;
3463 	}
3464 	if (uap->nbytes == USER_ADDR_NULL) {
3465 		error = EINVAL;
3466 		goto done2;
3467 	}
3468 	if (uap->flags != 0) {
3469 		error = EINVAL;
3470 		goto done2;
3471 	}
3472 
3473 	context.vc_ucred = fp->fp_glob->fg_cred;
3474 
3475 #if CONFIG_MACF_SOCKET_SUBSET
3476 	/* JMM - fetch connected sockaddr? */
3477 	error = mac_socket_check_send(context.vc_ucred, so, NULL);
3478 	if (error) {
3479 		goto done2;
3480 	}
3481 #endif
3482 
3483 	/*
3484 	 * Get number of bytes to send
3485 	 * Should it applies to size of header and trailer?
3486 	 */
3487 	error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3488 	if (error) {
3489 		goto done2;
3490 	}
3491 
3492 	/*
3493 	 * If specified, get the pointer to the sf_hdtr struct for
3494 	 * any headers/trailers.
3495 	 */
3496 	if (uap->hdtr != USER_ADDR_NULL) {
3497 		caddr_t hdtrp;
3498 
3499 		bzero(&user_hdtr, sizeof(user_hdtr));
3500 		if (is_p_64bit_process) {
3501 			hdtrp = (caddr_t)&user64_hdtr;
3502 			sizeof_hdtr = sizeof(user64_hdtr);
3503 		} else {
3504 			hdtrp = (caddr_t)&user32_hdtr;
3505 			sizeof_hdtr = sizeof(user32_hdtr);
3506 		}
3507 		error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3508 		if (error) {
3509 			goto done2;
3510 		}
3511 		if (is_p_64bit_process) {
3512 			user_hdtr.headers = user64_hdtr.headers;
3513 			user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3514 			user_hdtr.trailers = user64_hdtr.trailers;
3515 			user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3516 		} else {
3517 			user_hdtr.headers = user32_hdtr.headers;
3518 			user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3519 			user_hdtr.trailers = user32_hdtr.trailers;
3520 			user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3521 		}
3522 
3523 		/*
3524 		 * Send any headers. Wimp out and use writev(2).
3525 		 */
3526 		if (user_hdtr.headers != USER_ADDR_NULL) {
3527 			bzero(&nuap, sizeof(struct writev_args));
3528 			nuap.fd = uap->s;
3529 			nuap.iovp = user_hdtr.headers;
3530 			nuap.iovcnt = user_hdtr.hdr_cnt;
3531 			error = writev_nocancel(p, &nuap, &writev_retval);
3532 			if (error) {
3533 				goto done2;
3534 			}
3535 			sbytes += writev_retval;
3536 		}
3537 	}
3538 
3539 	/*
3540 	 * Get the file size for 2 reasons:
3541 	 *  1. We don't want to allocate more mbufs than necessary
3542 	 *  2. We don't want to read past the end of file
3543 	 */
3544 	if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3545 		goto done2;
3546 	}
3547 
3548 	/*
3549 	 * Simply read file data into a chain of mbufs that used with scatter
3550 	 * gather reads. We're not (yet?) setup to use zero copy external
3551 	 * mbufs that point to the file pages.
3552 	 */
3553 	socket_lock(so, 1);
3554 	error = sblock(&so->so_snd, SBL_WAIT);
3555 	if (error) {
3556 		socket_unlock(so, 1);
3557 		goto done2;
3558 	}
3559 	for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3560 		mbuf_ref_t m0 = NULL;
3561 		mbuf_t  m;
3562 		unsigned int    nbufs = SFUIOBUFS, i;
3563 		uio_t   auio;
3564 		uio_stackbuf_t    uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
3565 		size_t  uiolen;
3566 		user_ssize_t    rlen;
3567 		off_t   pgoff;
3568 		size_t  pktlen;
3569 		boolean_t jumbocl;
3570 
3571 		/*
3572 		 * Calculate the amount to transfer.
3573 		 * Align to round number of pages.
3574 		 * Not to exceed send socket buffer,
3575 		 * the EOF, or the passed in nbytes.
3576 		 */
3577 		xfsize = sbspace(&so->so_snd);
3578 
3579 		if (xfsize <= 0) {
3580 			if (so->so_state & SS_CANTSENDMORE) {
3581 				error = EPIPE;
3582 				goto done3;
3583 			} else if ((so->so_state & SS_NBIO)) {
3584 				error = EAGAIN;
3585 				goto done3;
3586 			} else {
3587 				xfsize = PAGE_SIZE;
3588 			}
3589 		}
3590 
3591 		if (xfsize > SENDFILE_MAX_BYTES) {
3592 			xfsize = SENDFILE_MAX_BYTES;
3593 		} else if (xfsize > PAGE_SIZE) {
3594 			xfsize = trunc_page(xfsize);
3595 		}
3596 		pgoff = off & PAGE_MASK_64;
3597 		if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3598 			xfsize = PAGE_SIZE_64 - pgoff;
3599 		}
3600 		if (nbytes && xfsize > (nbytes - sbytes)) {
3601 			xfsize = nbytes - sbytes;
3602 		}
3603 		if (xfsize <= 0) {
3604 			break;
3605 		}
3606 		if (off + xfsize > file_size) {
3607 			xfsize = file_size - off;
3608 		}
3609 		if (xfsize <= 0) {
3610 			break;
3611 		}
3612 
3613 		/*
3614 		 * Attempt to use larger than system page-size clusters for
3615 		 * large writes only if there is a jumbo cluster pool and
3616 		 * if the socket is marked accordingly.
3617 		 */
3618 		jumbocl = sosendjcl && njcl > 0 &&
3619 		    ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3620 
3621 		socket_unlock(so, 0);
3622 		alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3623 		pktlen = mbuf_pkthdr_maxlen(m0);
3624 		if (pktlen < (size_t)xfsize) {
3625 			xfsize = pktlen;
3626 		}
3627 
3628 		auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3629 		    UIO_READ, &uio_buf[0], sizeof(uio_buf));
3630 		if (auio == NULL) {
3631 			printf("sendfile failed. nbufs = %d. %s", nbufs,
3632 			    "File a radar related to rdar://10146739.\n");
3633 			mbuf_freem(m0);
3634 			error = ENXIO;
3635 			socket_lock(so, 0);
3636 			goto done3;
3637 		}
3638 
3639 		for (i = 0, m = m0, uiolen = 0;
3640 		    i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3641 		    i++, m = mbuf_next(m)) {
3642 			size_t mlen = mbuf_maxlen(m);
3643 
3644 			if (mlen + uiolen > (size_t)xfsize) {
3645 				mlen = xfsize - uiolen;
3646 			}
3647 			mbuf_setlen(m, mlen);
3648 			uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3649 			    mlen);
3650 			uiolen += mlen;
3651 		}
3652 
3653 		if (xfsize != uio_resid(auio)) {
3654 			printf("sendfile: xfsize: %lld != uio_resid(auio): "
3655 			    "%lld\n", xfsize, (long long)uio_resid(auio));
3656 		}
3657 
3658 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3659 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3660 		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3661 		error = fo_read(fp, auio, FOF_OFFSET, &context);
3662 		socket_lock(so, 0);
3663 		if (error != 0) {
3664 			if (uio_resid(auio) != xfsize && (error == ERESTART ||
3665 			    error == EINTR || error == EWOULDBLOCK)) {
3666 				error = 0;
3667 			} else {
3668 				mbuf_freem(m0);
3669 				goto done3;
3670 			}
3671 		}
3672 		xfsize -= uio_resid(auio);
3673 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3674 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3675 		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3676 
3677 		if (xfsize == 0) {
3678 			// printf("sendfile: fo_read 0 bytes, EOF\n");
3679 			break;
3680 		}
3681 		if (xfsize + off > file_size) {
3682 			printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3683 			    "%lld\n", xfsize, off, file_size);
3684 		}
3685 		for (i = 0, m = m0, rlen = 0;
3686 		    i < nbufs && m != NULL && rlen < xfsize;
3687 		    i++, m = mbuf_next(m)) {
3688 			size_t mlen = mbuf_maxlen(m);
3689 
3690 			if (rlen + mlen > (size_t)xfsize) {
3691 				mlen = xfsize - rlen;
3692 			}
3693 			mbuf_setlen(m, mlen);
3694 
3695 			rlen += mlen;
3696 		}
3697 		mbuf_pkthdr_setlen(m0, xfsize);
3698 
3699 retry_space:
3700 		/*
3701 		 * Make sure that the socket is still able to take more data.
3702 		 * CANTSENDMORE being true usually means that the connection
3703 		 * was closed. so_error is true when an error was sensed after
3704 		 * a previous send.
3705 		 * The state is checked after the page mapping and buffer
3706 		 * allocation above since those operations may block and make
3707 		 * any socket checks stale. From this point forward, nothing
3708 		 * blocks before the pru_send (or more accurately, any blocking
3709 		 * results in a loop back to here to re-check).
3710 		 */
3711 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3712 			if (so->so_state & SS_CANTSENDMORE) {
3713 				error = EPIPE;
3714 			} else {
3715 				error = so->so_error;
3716 				so->so_error = 0;
3717 			}
3718 			m_freem(m0);
3719 			goto done3;
3720 		}
3721 		/*
3722 		 * Wait for socket space to become available. We do this just
3723 		 * after checking the connection state above in order to avoid
3724 		 * a race condition with sbwait().
3725 		 */
3726 		if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
3727 			if (so->so_state & SS_NBIO) {
3728 				m_freem(m0);
3729 				error = EAGAIN;
3730 				goto done3;
3731 			}
3732 			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3733 			    DBG_FUNC_START), uap->s, 0, 0, 0, 0);
3734 			error = sbwait(&so->so_snd);
3735 			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3736 			    DBG_FUNC_END), uap->s, 0, 0, 0, 0);
3737 			/*
3738 			 * An error from sbwait usually indicates that we've
3739 			 * been interrupted by a signal. If we've sent anything
3740 			 * then return bytes sent, otherwise return the error.
3741 			 */
3742 			if (error) {
3743 				m_freem(m0);
3744 				goto done3;
3745 			}
3746 			goto retry_space;
3747 		}
3748 
3749 		mbuf_ref_t  control = NULL;
3750 		{
3751 			/*
3752 			 * Socket filter processing
3753 			 */
3754 
3755 			error = sflt_data_out(so, NULL, &m0, &control, 0);
3756 			if (error) {
3757 				if (error == EJUSTRETURN) {
3758 					error = 0;
3759 					continue;
3760 				}
3761 				goto done3;
3762 			}
3763 			/*
3764 			 * End Socket filter processing
3765 			 */
3766 		}
3767 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3768 		    uap->s, 0, 0, 0, 0);
3769 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
3770 		    NULL, control, p);
3771 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3772 		    uap->s, 0, 0, 0, 0);
3773 		if (error) {
3774 			goto done3;
3775 		}
3776 	}
3777 	sbunlock(&so->so_snd, FALSE);   /* will unlock socket */
3778 	/*
3779 	 * Send trailers. Wimp out and use writev(2).
3780 	 */
3781 	if (uap->hdtr != USER_ADDR_NULL &&
3782 	    user_hdtr.trailers != USER_ADDR_NULL) {
3783 		bzero(&nuap, sizeof(struct writev_args));
3784 		nuap.fd = uap->s;
3785 		nuap.iovp = user_hdtr.trailers;
3786 		nuap.iovcnt = user_hdtr.trl_cnt;
3787 		error = writev_nocancel(p, &nuap, &writev_retval);
3788 		if (error) {
3789 			goto done2;
3790 		}
3791 		sbytes += writev_retval;
3792 	}
3793 done2:
3794 	file_drop(uap->s);
3795 done1:
3796 	file_drop(uap->fd);
3797 done:
3798 	if (uap->nbytes != USER_ADDR_NULL) {
3799 		/* XXX this appears bogus for some early failure conditions */
3800 		copyout(&sbytes, uap->nbytes, sizeof(off_t));
3801 	}
3802 	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3803 	    (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3804 	    (unsigned int)(sbytes & 0x0ffffffff), error, 0);
3805 	return error;
3806 done3:
3807 	sbunlock(&so->so_snd, FALSE);   /* will unlock socket */
3808 	goto done2;
3809 }
3810 
3811 
3812 #endif /* SENDFILE */
3813