xref: /xnu-8792.61.2/bsd/kern/uipc_syscalls.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1989, 1990, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * sendfile(2) and related extensions:
33  * Copyright (c) 1998, David Greenman. All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgement:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  * 4. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  *
63  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
64  */
65 /*
66  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67  * support for mandatory and extensible security protections.  This notice
68  * is included in support of clause 2.2 (b) of the Apple Public License,
69  * Version 2.0.
70  */
71 
72 #include <sys/cdefs.h>
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/filedesc.h>
76 #include <sys/proc_internal.h>
77 #include <sys/file_internal.h>
78 #include <sys/vnode_internal.h>
79 #include <sys/malloc.h>
80 #include <sys/mcache.h>
81 #include <sys/mbuf.h>
82 #include <kern/locks.h>
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/signalvar.h>
86 #include <sys/socket.h>
87 #include <sys/socketvar.h>
88 #include <sys/kernel.h>
89 #include <sys/uio_internal.h>
90 #include <sys/kauth.h>
91 #include <kern/task.h>
92 #include <sys/priv.h>
93 #include <sys/sysctl.h>
94 #include <sys/sys_domain.h>
95 
96 #include <security/audit/audit.h>
97 
98 #include <sys/kdebug.h>
99 #include <sys/sysproto.h>
100 #include <netinet/in.h>
101 #include <net/route.h>
102 #include <netinet/in_pcb.h>
103 
104 #include <os/log.h>
105 #include <os/ptrtools.h>
106 
107 #include <os/log.h>
108 
109 #if CONFIG_MACF_SOCKET_SUBSET
110 #include <security/mac_framework.h>
111 #endif /* MAC_SOCKET_SUBSET */
112 
113 #define f_flag fp_glob->fg_flag
114 #define f_ops fp_glob->fg_ops
115 
116 #define DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETSOCK, 0)
117 #define DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETSOCK, 2)
118 #define DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETSOCK, 1)
119 #define DBG_LAYER_OUT_END       NETDBG_CODE(DBG_NETSOCK, 3)
120 #define DBG_FNC_SENDMSG         NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
121 #define DBG_FNC_SENDTO          NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
122 #define DBG_FNC_SENDIT          NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
123 #define DBG_FNC_RECVFROM        NETDBG_CODE(DBG_NETSOCK, (5 << 8))
124 #define DBG_FNC_RECVMSG         NETDBG_CODE(DBG_NETSOCK, (6 << 8))
125 #define DBG_FNC_RECVIT          NETDBG_CODE(DBG_NETSOCK, (7 << 8))
126 #define DBG_FNC_SENDFILE        NETDBG_CODE(DBG_NETSOCK, (10 << 8))
127 #define DBG_FNC_SENDFILE_WAIT   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
128 #define DBG_FNC_SENDFILE_READ   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
129 #define DBG_FNC_SENDFILE_SEND   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
130 #define DBG_FNC_SENDMSG_X       NETDBG_CODE(DBG_NETSOCK, (11 << 8))
131 #define DBG_FNC_RECVMSG_X       NETDBG_CODE(DBG_NETSOCK, (12 << 8))
132 
133 #if DEBUG || DEVELOPMENT
134 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
135 #define DBG_PRINTF(...) printf(__VA_ARGS__)
136 #else
137 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
138 #define DBG_PRINTF(...) do { } while (0)
139 #endif
140 
141 /* Forward declarations for referenced types */
142 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(void, void, __CCT_PTR);
143 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(uint8_t, uint8_t, __CCT_PTR);
144 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int32_t, int32, __CCT_REF);
145 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int, int, __CCT_REF);
146 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(user_ssize_t, user_ssize, __CCT_REF);
147 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(unsigned int, uint, __CCT_REF);
148 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(sae_connid_t, sae_connid, __CCT_REF);
149 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(socklen_t, socklen, __CCT_REF);
150 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct setsockopt_args, setsockopt_args, __CCT_REF);
151 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct connectx_args, connectx_args, __CCT_REF);
152 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct disconnectx_args, disconnectx_args, __CCT_REF);
153 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct cmsghdr, cmsghdr, __CCT_REF);
154 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct timeval, timeval, __CCT_REF);
155 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user64_timeval, user64_timeval, __CCT_REF);
156 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user32_timeval, user32_timeval, __CCT_REF);
157 
158 static int sendit(proc_ref_t, socket_ref_t, user_msghdr_ref_t, uio_t,
159     int, int32_ref_t );
160 static int recvit(proc_ref_t, int, user_msghdr_ref_t, uio_t, user_addr_t,
161     int32_ref_t);
162 static int connectit(socket_ref_t, sockaddr_ref_t);
163 static int getsockaddr(socket_ref_t, sockaddr_ref_ref_t, user_addr_t,
164     size_t, boolean_t);
165 static int getsockaddr_s(socket_ref_t, sockaddr_storage_ref_t,
166     user_addr_t, size_t, boolean_t);
167 #if SENDFILE
168 static void alloc_sendpkt(int, size_t, uint_ref_t, mbuf_ref_ref_t,
169     boolean_t);
170 #endif /* SENDFILE */
171 static int connectx_nocancel(proc_ref_t, connectx_args_ref_t, int_ref_t);
172 static int connectitx(socket_ref_t, sockaddr_ref_t,
173     sockaddr_ref_t, proc_ref_t, uint32_t, sae_associd_t,
174     sae_connid_ref_t, uio_t, unsigned int, user_ssize_ref_t);
175 static int disconnectx_nocancel(proc_ref_t, disconnectx_args_ref_t,
176     int_ref_t);
177 static int socket_common(proc_ref_t, int, int, int, pid_t, int32_ref_t, int);
178 
179 static int internalize_user_msghdr_array(const void_ptr_t, int, int,
180     u_int count, user_msghdr_x_ptr_t, uio_ref_ptr_t);
181 
182 static void externalize_user_msghdr_array(void_ptr_t, int, int, u_int count,
183     const user_msghdr_x_ptr_t, uio_ref_ptr_t);
184 
185 static void free_uio_array(uio_ref_ptr_t, u_int count);
186 static boolean_t uio_array_is_valid(uio_ref_ptr_t, u_int count);
187 static int internalize_recv_msghdr_array(const void_ptr_t, int, int,
188     u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t);
189 static u_int externalize_recv_msghdr_array(proc_ref_t, socket_ref_t, void_ptr_t,
190     u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t, int_ref_t);
191 
192 static recv_msg_elem_ptr_t alloc_recv_msg_array(u_int count);
193 static int recv_msg_array_is_valid(recv_msg_elem_ptr_t, u_int count);
194 static void free_recv_msg_array(recv_msg_elem_ptr_t, u_int count);
195 static int copyout_control(proc_ref_t, mbuf_ref_t, user_addr_t control,
196     socklen_ref_t, int_ref_t, socket_ref_t);
197 
198 SYSCTL_DECL(_kern_ipc);
199 
200 static u_int somaxsendmsgx = 100;
201 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
202     CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
203 static u_int somaxrecvmsgx = 100;
204 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
205     CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
206 
207 static u_int missingpktinfo = 0;
208 SYSCTL_UINT(_kern_ipc, OID_AUTO, missingpktinfo,
209     CTLFLAG_RD | CTLFLAG_LOCKED, &missingpktinfo, 0, "");
210 
211 /*
212  * System call interface to the socket abstraction.
213  */
214 
215 extern const struct fileops socketops;
216 
217 /*
218  * Returns:	0			Success
219  *		EACCES			Mandatory Access Control failure
220  *	falloc:ENFILE
221  *	falloc:EMFILE
222  *	falloc:ENOMEM
223  *	socreate:EAFNOSUPPORT
224  *	socreate:EPROTOTYPE
225  *	socreate:EPROTONOSUPPORT
226  *	socreate:ENOBUFS
227  *	socreate:ENOMEM
228  *	socreate:???			[other protocol families, IPSEC]
229  */
230 int
socket(proc_ref_t p,struct socket_args * uap,int32_ref_t retval)231 socket(proc_ref_t p,
232     struct socket_args *uap,
233     int32_ref_t retval)
234 {
235 	return socket_common(p, uap->domain, uap->type, uap->protocol,
236 	           proc_selfpid(), retval, 0);
237 }
238 
239 int
socket_delegate(proc_ref_t p,struct socket_delegate_args * uap,int32_ref_t retval)240 socket_delegate(proc_ref_t p,
241     struct socket_delegate_args *uap,
242     int32_ref_t retval)
243 {
244 	return socket_common(p, uap->domain, uap->type, uap->protocol,
245 	           uap->epid, retval, 1);
246 }
247 
248 static int
socket_common(proc_ref_t p,int domain,int type,int protocol,pid_t epid,int32_ref_t retval,int delegate)249 socket_common(proc_ref_t p,
250     int domain,
251     int type,
252     int protocol,
253     pid_t epid,
254     int32_ref_t retval,
255     int delegate)
256 {
257 	socket_ref_t so;
258 	fileproc_ref_t  fp;
259 	int fd, error;
260 
261 	AUDIT_ARG(socket, domain, type, protocol);
262 #if CONFIG_MACF_SOCKET_SUBSET
263 	if ((error = mac_socket_check_create(kauth_cred_get(), domain,
264 	    type, protocol)) != 0) {
265 		return error;
266 	}
267 #endif /* MAC_SOCKET_SUBSET */
268 
269 	if (delegate) {
270 		error = priv_check_cred(kauth_cred_get(),
271 		    PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
272 		if (error) {
273 			return EACCES;
274 		}
275 	}
276 
277 	error = falloc(p, &fp, &fd, vfs_context_current());
278 	if (error) {
279 		return error;
280 	}
281 	fp->f_flag = FREAD | FWRITE;
282 	fp->f_ops = &socketops;
283 
284 	if (delegate) {
285 		error = socreate_delegate(domain, &so, type, protocol, epid);
286 	} else {
287 		error = socreate(domain, &so, type, protocol);
288 	}
289 
290 	if (error) {
291 		fp_free(p, fd, fp);
292 	} else {
293 		fp_set_data(fp, so);
294 
295 		proc_fdlock(p);
296 		procfdtbl_releasefd(p, fd, NULL);
297 
298 		if (ENTR_SHOULDTRACE) {
299 			KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
300 			    fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
301 		}
302 		fp_drop(p, fd, fp, 1);
303 		proc_fdunlock(p);
304 
305 		*retval = fd;
306 	}
307 	return error;
308 }
309 
310 /*
311  * Returns:	0			Success
312  *		EDESTADDRREQ		Destination address required
313  *		EBADF			Bad file descriptor
314  *		EACCES			Mandatory Access Control failure
315  *	file_socket:ENOTSOCK
316  *	file_socket:EBADF
317  *	getsockaddr:ENAMETOOLONG	Filename too long
318  *	getsockaddr:EINVAL		Invalid argument
319  *	getsockaddr:ENOMEM		Not enough space
320  *	getsockaddr:EFAULT		Bad address
321  *	sobindlock:???
322  */
323 /* ARGSUSED */
324 int
bind(__unused proc_t p,struct bind_args * uap,__unused int32_ref_t retval)325 bind(__unused proc_t p, struct bind_args *uap, __unused int32_ref_t retval)
326 {
327 	struct sockaddr_storage ss;
328 	sockaddr_ref_t  sa = NULL;
329 	socket_ref_t so;
330 	boolean_t want_free = TRUE;
331 	int error;
332 
333 	AUDIT_ARG(fd, uap->s);
334 	error = file_socket(uap->s, &so);
335 	if (error != 0) {
336 		return error;
337 	}
338 	if (so == NULL) {
339 		error = EBADF;
340 		goto out;
341 	}
342 	if (uap->name == USER_ADDR_NULL) {
343 		error = EDESTADDRREQ;
344 		goto out;
345 	}
346 	if (uap->namelen > sizeof(ss)) {
347 		error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
348 	} else {
349 		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
350 		if (error == 0) {
351 			sa = (sockaddr_ref_t)&ss;
352 			want_free = FALSE;
353 		}
354 	}
355 	if (error != 0) {
356 		goto out;
357 	}
358 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
359 #if CONFIG_MACF_SOCKET_SUBSET
360 	if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
361 	    (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
362 		error = sobindlock(so, sa, 1);  /* will lock socket */
363 	}
364 #else
365 	error = sobindlock(so, sa, 1);          /* will lock socket */
366 #endif /* MAC_SOCKET_SUBSET */
367 	if (want_free) {
368 		free_sockaddr(sa);
369 	}
370 out:
371 	file_drop(uap->s);
372 	return error;
373 }
374 
375 /*
376  * Returns:	0			Success
377  *		EBADF
378  *		EACCES			Mandatory Access Control failure
379  *	file_socket:ENOTSOCK
380  *	file_socket:EBADF
381  *	solisten:EINVAL
382  *	solisten:EOPNOTSUPP
383  *	solisten:???
384  */
385 int
listen(__unused proc_ref_t p,struct listen_args * uap,__unused int32_ref_t retval)386 listen(__unused proc_ref_t p, struct listen_args *uap,
387     __unused int32_ref_t retval)
388 {
389 	int error;
390 	socket_ref_t so;
391 
392 	AUDIT_ARG(fd, uap->s);
393 	error = file_socket(uap->s, &so);
394 	if (error) {
395 		return error;
396 	}
397 	if (so != NULL)
398 #if CONFIG_MACF_SOCKET_SUBSET
399 	{
400 		error = mac_socket_check_listen(kauth_cred_get(), so);
401 		if (error == 0) {
402 			error = solisten(so, uap->backlog);
403 		}
404 	}
405 #else
406 	{ error = solisten(so, uap->backlog);}
407 #endif /* MAC_SOCKET_SUBSET */
408 	else {
409 		error = EBADF;
410 	}
411 
412 	file_drop(uap->s);
413 	return error;
414 }
415 
416 /*
417  * Returns:	fp_get_ftype:EBADF	Bad file descriptor
418  *		fp_get_ftype:ENOTSOCK	Socket operation on non-socket
419  *		:EFAULT			Bad address on copyin/copyout
420  *		:EBADF			Bad file descriptor
421  *		:EOPNOTSUPP		Operation not supported on socket
422  *		:EINVAL			Invalid argument
423  *		:EWOULDBLOCK		Operation would block
424  *		:ECONNABORTED		Connection aborted
425  *		:EINTR			Interrupted function
426  *		:EACCES			Mandatory Access Control failure
427  *		falloc:ENFILE		Too many files open in system
428  *		falloc:EMFILE		Too many open files
429  *		falloc:ENOMEM		Not enough space
430  *		0			Success
431  */
432 int
accept_nocancel(proc_ref_t p,struct accept_nocancel_args * uap,int32_ref_t retval)433 accept_nocancel(proc_ref_t p, struct accept_nocancel_args *uap,
434     int32_ref_t retval)
435 {
436 	fileproc_ref_t  fp;
437 	sockaddr_ref_t  sa = NULL;
438 	socklen_t namelen;
439 	int error;
440 	socket_ref_t  head;
441 	socket_ref_t so = NULL;
442 	lck_mtx_t *mutex_held;
443 	int fd = uap->s;
444 	int newfd;
445 	unsigned int fflag;
446 	int dosocklock = 0;
447 
448 	*retval = -1;
449 
450 	AUDIT_ARG(fd, uap->s);
451 
452 	if (uap->name) {
453 		error = copyin(uap->anamelen, (caddr_t)&namelen,
454 		    sizeof(socklen_t));
455 		if (error) {
456 			return error;
457 		}
458 	}
459 	error = fp_get_ftype(p, fd, DTYPE_SOCKET, ENOTSOCK, &fp);
460 	if (error) {
461 		return error;
462 	}
463 	head = (struct socket *)fp_get_data(fp);
464 
465 #if CONFIG_MACF_SOCKET_SUBSET
466 	if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
467 		goto out;
468 	}
469 #endif /* MAC_SOCKET_SUBSET */
470 
471 	socket_lock(head, 1);
472 
473 	if (head->so_proto->pr_getlock != NULL) {
474 		mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
475 		dosocklock = 1;
476 	} else {
477 		mutex_held = head->so_proto->pr_domain->dom_mtx;
478 		dosocklock = 0;
479 	}
480 
481 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
482 		if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
483 			error = EOPNOTSUPP;
484 		} else {
485 			/* POSIX: The socket is not accepting connections */
486 			error = EINVAL;
487 		}
488 		socket_unlock(head, 1);
489 		goto out;
490 	}
491 check_again:
492 	if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
493 		socket_unlock(head, 1);
494 		error = EWOULDBLOCK;
495 		goto out;
496 	}
497 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
498 		if (head->so_state & SS_CANTRCVMORE) {
499 			head->so_error = ECONNABORTED;
500 			break;
501 		}
502 		if (head->so_usecount < 1) {
503 			panic("accept: head=%p refcount=%d", head,
504 			    head->so_usecount);
505 		}
506 		error = msleep((caddr_t)&head->so_timeo, mutex_held,
507 		    PSOCK | PCATCH, "accept", 0);
508 		if (head->so_usecount < 1) {
509 			panic("accept: 2 head=%p refcount=%d", head,
510 			    head->so_usecount);
511 		}
512 		if ((head->so_state & SS_DRAINING)) {
513 			error = ECONNABORTED;
514 		}
515 		if (error) {
516 			socket_unlock(head, 1);
517 			goto out;
518 		}
519 	}
520 	if (head->so_error) {
521 		error = head->so_error;
522 		head->so_error = 0;
523 		socket_unlock(head, 1);
524 		goto out;
525 	}
526 
527 	/*
528 	 * At this point we know that there is at least one connection
529 	 * ready to be accepted. Remove it from the queue prior to
530 	 * allocating the file descriptor for it since falloc() may
531 	 * block allowing another process to accept the connection
532 	 * instead.
533 	 */
534 	lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
535 
536 	so_acquire_accept_list(head, NULL);
537 	if (TAILQ_EMPTY(&head->so_comp)) {
538 		so_release_accept_list(head);
539 		goto check_again;
540 	}
541 
542 	so = TAILQ_FIRST(&head->so_comp);
543 	TAILQ_REMOVE(&head->so_comp, so, so_list);
544 	/*
545 	 * Acquire the lock of the new connection
546 	 * as we may be in the process of receiving
547 	 * a packet that may change its so_state
548 	 * (e.g.: a TCP FIN).
549 	 */
550 	if (dosocklock) {
551 		socket_lock(so, 0);
552 	}
553 	so->so_head = NULL;
554 	so->so_state &= ~SS_COMP;
555 	if (dosocklock) {
556 		socket_unlock(so, 0);
557 	}
558 	head->so_qlen--;
559 	so_release_accept_list(head);
560 
561 	/* unlock head to avoid deadlock with select, keep a ref on head */
562 	socket_unlock(head, 0);
563 
564 #if CONFIG_MACF_SOCKET_SUBSET
565 	/*
566 	 * Pass the pre-accepted socket to the MAC framework. This is
567 	 * cheaper than allocating a file descriptor for the socket,
568 	 * calling the protocol accept callback, and possibly freeing
569 	 * the file descriptor should the MAC check fails.
570 	 */
571 	if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
572 		socket_lock(so, 1);
573 		so->so_state &= ~SS_NOFDREF;
574 		socket_unlock(so, 1);
575 		soclose(so);
576 		/* Drop reference on listening socket */
577 		sodereference(head);
578 		goto out;
579 	}
580 #endif /* MAC_SOCKET_SUBSET */
581 
582 	/*
583 	 * Pass the pre-accepted socket to any interested socket filter(s).
584 	 * Upon failure, the socket would have been closed by the callee.
585 	 */
586 	if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
587 		/* Drop reference on listening socket */
588 		sodereference(head);
589 		/* Propagate socket filter's error code to the caller */
590 		goto out;
591 	}
592 
593 	fflag = fp->f_flag;
594 	error = falloc(p, &fp, &newfd, vfs_context_current());
595 	if (error) {
596 		/*
597 		 * Probably ran out of file descriptors.
598 		 *
599 		 * <rdar://problem/8554930>
600 		 * Don't put this back on the socket like we used to, that
601 		 * just causes the client to spin. Drop the socket.
602 		 */
603 		socket_lock(so, 1);
604 		so->so_state &= ~SS_NOFDREF;
605 		socket_unlock(so, 1);
606 		soclose(so);
607 		sodereference(head);
608 		goto out;
609 	}
610 	*retval = newfd;
611 	fp->f_flag = fflag;
612 	fp->f_ops = &socketops;
613 	fp_set_data(fp, so);
614 
615 	socket_lock(head, 0);
616 	if (dosocklock) {
617 		socket_lock(so, 1);
618 	}
619 
620 	/* Sync socket non-blocking/async state with file flags */
621 	if (fp->f_flag & FNONBLOCK) {
622 		so->so_state |= SS_NBIO;
623 	} else {
624 		so->so_state &= ~SS_NBIO;
625 	}
626 
627 	if (fp->f_flag & FASYNC) {
628 		so->so_state |= SS_ASYNC;
629 		so->so_rcv.sb_flags |= SB_ASYNC;
630 		so->so_snd.sb_flags |= SB_ASYNC;
631 	} else {
632 		so->so_state &= ~SS_ASYNC;
633 		so->so_rcv.sb_flags &= ~SB_ASYNC;
634 		so->so_snd.sb_flags &= ~SB_ASYNC;
635 	}
636 
637 	(void) soacceptlock(so, &sa, 0);
638 	socket_unlock(head, 1);
639 	if (sa == NULL) {
640 		namelen = 0;
641 		if (uap->name) {
642 			goto gotnoname;
643 		}
644 		error = 0;
645 		goto releasefd;
646 	}
647 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
648 
649 	if (uap->name) {
650 		socklen_t       sa_len;
651 
652 		/* save sa_len before it is destroyed */
653 		sa_len = sa->sa_len;
654 		namelen = MIN(namelen, sa_len);
655 		error = copyout(sa, uap->name, namelen);
656 		if (!error) {
657 			/* return the actual, untruncated address length */
658 			namelen = sa_len;
659 		}
660 gotnoname:
661 		error = copyout((caddr_t)&namelen, uap->anamelen,
662 		    sizeof(socklen_t));
663 	}
664 	free_sockaddr(sa);
665 
666 releasefd:
667 	/*
668 	 * If the socket has been marked as inactive by sosetdefunct(),
669 	 * disallow further operations on it.
670 	 */
671 	if (so->so_flags & SOF_DEFUNCT) {
672 		sodefunct(current_proc(), so,
673 		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
674 	}
675 
676 	if (dosocklock) {
677 		socket_unlock(so, 1);
678 	}
679 
680 	proc_fdlock(p);
681 	procfdtbl_releasefd(p, newfd, NULL);
682 	fp_drop(p, newfd, fp, 1);
683 	proc_fdunlock(p);
684 
685 out:
686 	if (error == 0 && ENTR_SHOULDTRACE) {
687 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
688 		    newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
689 	}
690 
691 	file_drop(fd);
692 	return error;
693 }
694 
695 int
accept(proc_ref_t p,struct accept_args * uap,int32_ref_t retval)696 accept(proc_ref_t p, struct accept_args *uap, int32_ref_t retval)
697 {
698 	__pthread_testcancel(1);
699 	return accept_nocancel(p, (struct accept_nocancel_args *)uap,
700 	           retval);
701 }
702 
703 /*
704  * Returns:	0			Success
705  *		EBADF			Bad file descriptor
706  *		EALREADY		Connection already in progress
707  *		EINPROGRESS		Operation in progress
708  *		ECONNABORTED		Connection aborted
709  *		EINTR			Interrupted function
710  *		EACCES			Mandatory Access Control failure
711  *	file_socket:ENOTSOCK
712  *	file_socket:EBADF
713  *	getsockaddr:ENAMETOOLONG	Filename too long
714  *	getsockaddr:EINVAL		Invalid argument
715  *	getsockaddr:ENOMEM		Not enough space
716  *	getsockaddr:EFAULT		Bad address
717  *	soconnectlock:EOPNOTSUPP
718  *	soconnectlock:EISCONN
719  *	soconnectlock:???		[depends on protocol, filters]
720  *	msleep:EINTR
721  *
722  * Imputed:	so_error		error may be set from so_error, which
723  *					may have been set by soconnectlock.
724  */
725 /* ARGSUSED */
726 int
connect(proc_ref_t p,struct connect_args * uap,int32_ref_t retval)727 connect(proc_ref_t p, struct connect_args *uap, int32_ref_t retval)
728 {
729 	__pthread_testcancel(1);
730 	return connect_nocancel(p, (struct connect_nocancel_args *)uap,
731 	           retval);
732 }
733 
734 int
connect_nocancel(proc_t p,struct connect_nocancel_args * uap,int32_ref_t retval)735 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_ref_t retval)
736 {
737 #pragma unused(p, retval)
738 	socket_ref_t so;
739 	struct sockaddr_storage ss;
740 	sockaddr_ref_t  sa = NULL;
741 	int error;
742 	int fd = uap->s;
743 	boolean_t dgram;
744 
745 	AUDIT_ARG(fd, uap->s);
746 	error = file_socket(fd, &so);
747 	if (error != 0) {
748 		return error;
749 	}
750 	if (so == NULL) {
751 		error = EBADF;
752 		goto out;
753 	}
754 
755 	/*
756 	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
757 	 * if this is a datagram socket; translate for other types.
758 	 */
759 	dgram = (so->so_type == SOCK_DGRAM);
760 
761 	/* Get socket address now before we obtain socket lock */
762 	if (uap->namelen > sizeof(ss)) {
763 		error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
764 	} else {
765 		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
766 		if (error == 0) {
767 			sa = (sockaddr_ref_t)&ss;
768 		}
769 	}
770 	if (error != 0) {
771 		goto out;
772 	}
773 
774 	error = connectit(so, sa);
775 
776 	if (sa != NULL && sa != SA(&ss)) {
777 		free_sockaddr(sa);
778 	}
779 	if (error == ERESTART) {
780 		error = EINTR;
781 	}
782 out:
783 	file_drop(fd);
784 	return error;
785 }
786 
787 static int
connectx_nocancel(proc_ref_t p,connectx_args_ref_t uap,int_ref_t retval)788 connectx_nocancel(proc_ref_t p, connectx_args_ref_t uap, int_ref_t retval)
789 {
790 #pragma unused(p, retval)
791 	struct sockaddr_storage ss, sd;
792 	sockaddr_ref_t  src = NULL, dst = NULL;
793 	socket_ref_t so;
794 	int error, error1, fd = uap->socket;
795 	boolean_t dgram;
796 	sae_connid_t cid = SAE_CONNID_ANY;
797 	struct user32_sa_endpoints ep32;
798 	struct user64_sa_endpoints ep64;
799 	struct user_sa_endpoints ep;
800 	user_ssize_t bytes_written = 0;
801 	struct user_iovec *iovp;
802 	uio_t auio = NULL;
803 
804 	AUDIT_ARG(fd, uap->socket);
805 	error = file_socket(fd, &so);
806 	if (error != 0) {
807 		return error;
808 	}
809 	if (so == NULL) {
810 		error = EBADF;
811 		goto out;
812 	}
813 
814 	if (uap->endpoints == USER_ADDR_NULL) {
815 		error = EINVAL;
816 		goto out;
817 	}
818 
819 	if (IS_64BIT_PROCESS(p)) {
820 		error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
821 		if (error != 0) {
822 			goto out;
823 		}
824 
825 		ep.sae_srcif = ep64.sae_srcif;
826 		ep.sae_srcaddr = (user_addr_t)ep64.sae_srcaddr;
827 		ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
828 		ep.sae_dstaddr = (user_addr_t)ep64.sae_dstaddr;
829 		ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
830 	} else {
831 		error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
832 		if (error != 0) {
833 			goto out;
834 		}
835 
836 		ep.sae_srcif = ep32.sae_srcif;
837 		ep.sae_srcaddr = ep32.sae_srcaddr;
838 		ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
839 		ep.sae_dstaddr = ep32.sae_dstaddr;
840 		ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
841 	}
842 
843 	/*
844 	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
845 	 * if this is a datagram socket; translate for other types.
846 	 */
847 	dgram = (so->so_type == SOCK_DGRAM);
848 
849 	/* Get socket address now before we obtain socket lock */
850 	if (ep.sae_srcaddr != USER_ADDR_NULL) {
851 		if (ep.sae_srcaddrlen > sizeof(ss)) {
852 			error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
853 		} else {
854 			error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
855 			if (error == 0) {
856 				src = (sockaddr_ref_t)&ss;
857 			}
858 		}
859 
860 		if (error) {
861 			goto out;
862 		}
863 	}
864 
865 	if (ep.sae_dstaddr == USER_ADDR_NULL) {
866 		error = EINVAL;
867 		goto out;
868 	}
869 
870 	/* Get socket address now before we obtain socket lock */
871 	if (ep.sae_dstaddrlen > sizeof(sd)) {
872 		error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
873 	} else {
874 		error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
875 		if (error == 0) {
876 			dst = (sockaddr_ref_t)&sd;
877 		}
878 	}
879 
880 	if (error) {
881 		goto out;
882 	}
883 
884 	VERIFY(dst != NULL);
885 
886 	if (uap->iov != USER_ADDR_NULL) {
887 		/* Verify range before calling uio_create() */
888 		if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
889 			error = EINVAL;
890 			goto out;
891 		}
892 
893 		if (uap->len == USER_ADDR_NULL) {
894 			error = EINVAL;
895 			goto out;
896 		}
897 
898 		/* allocate a uio to hold the number of iovecs passed */
899 		auio = uio_create(uap->iovcnt, 0,
900 		    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
901 		    UIO_WRITE);
902 
903 		if (auio == NULL) {
904 			error = ENOMEM;
905 			goto out;
906 		}
907 
908 		/*
909 		 * get location of iovecs within the uio.
910 		 * then copyin the iovecs from user space.
911 		 */
912 		iovp = uio_iovsaddr(auio);
913 		if (iovp == NULL) {
914 			error = ENOMEM;
915 			goto out;
916 		}
917 		error = copyin_user_iovec_array(uap->iov,
918 		    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
919 		    uap->iovcnt, iovp);
920 		if (error != 0) {
921 			goto out;
922 		}
923 
924 		/* finish setup of uio_t */
925 		error = uio_calculateresid(auio);
926 		if (error != 0) {
927 			goto out;
928 		}
929 	}
930 
931 	error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
932 	    &cid, auio, uap->flags, &bytes_written);
933 	if (error == ERESTART) {
934 		error = EINTR;
935 	}
936 
937 	if (uap->len != USER_ADDR_NULL) {
938 		error1 = copyout(&bytes_written, uap->len, sizeof(uap->len));
939 		/* give precedence to connectitx errors */
940 		if ((error1 != 0) && (error == 0)) {
941 			error = error1;
942 		}
943 	}
944 
945 	if (uap->connid != USER_ADDR_NULL) {
946 		error1 = copyout(&cid, uap->connid, sizeof(cid));
947 		/* give precedence to connectitx errors */
948 		if ((error1 != 0) && (error == 0)) {
949 			error = error1;
950 		}
951 	}
952 out:
953 	file_drop(fd);
954 	if (auio != NULL) {
955 		uio_free(auio);
956 	}
957 	if (src != NULL && src != SA(&ss)) {
958 		free_sockaddr(src);
959 	}
960 	if (dst != NULL && dst != SA(&sd)) {
961 		free_sockaddr(dst);
962 	}
963 	return error;
964 }
965 
966 int
connectx(proc_ref_t p,struct connectx_args * uap,int * retval)967 connectx(proc_ref_t p, struct connectx_args *uap, int *retval)
968 {
969 	/*
970 	 * Due to similiarity with a POSIX interface, define as
971 	 * an unofficial cancellation point.
972 	 */
973 	__pthread_testcancel(1);
974 	return connectx_nocancel(p, uap, retval);
975 }
976 
977 static int
connectit(struct socket * so,sockaddr_ref_t sa)978 connectit(struct socket *so, sockaddr_ref_t sa)
979 {
980 	int error;
981 
982 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
983 #if CONFIG_MACF_SOCKET_SUBSET
984 	if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
985 		return error;
986 	}
987 #endif /* MAC_SOCKET_SUBSET */
988 
989 	socket_lock(so, 1);
990 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
991 		error = EALREADY;
992 		goto out;
993 	}
994 	error = soconnectlock(so, sa, 0);
995 	if (error != 0) {
996 		goto out;
997 	}
998 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
999 		error = EINPROGRESS;
1000 		goto out;
1001 	}
1002 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1003 		lck_mtx_t *mutex_held;
1004 
1005 		if (so->so_proto->pr_getlock != NULL) {
1006 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1007 		} else {
1008 			mutex_held = so->so_proto->pr_domain->dom_mtx;
1009 		}
1010 		error = msleep((caddr_t)&so->so_timeo, mutex_held,
1011 		    PSOCK | PCATCH, __func__, 0);
1012 		if (so->so_state & SS_DRAINING) {
1013 			error = ECONNABORTED;
1014 		}
1015 		if (error != 0) {
1016 			break;
1017 		}
1018 	}
1019 	if (error == 0) {
1020 		error = so->so_error;
1021 		so->so_error = 0;
1022 	}
1023 out:
1024 	socket_unlock(so, 1);
1025 	return error;
1026 }
1027 
1028 static int
connectitx(struct socket * so,sockaddr_ref_t src,sockaddr_ref_t dst,proc_ref_t p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uio_t auio,unsigned int flags,user_ssize_t * bytes_written)1029 connectitx(struct socket *so, sockaddr_ref_t src,
1030     sockaddr_ref_t dst, proc_ref_t p, uint32_t ifscope,
1031     sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
1032     user_ssize_t *bytes_written)
1033 {
1034 	int error;
1035 
1036 	VERIFY(dst != NULL);
1037 
1038 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1039 #if CONFIG_MACF_SOCKET_SUBSET
1040 	if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1041 		return error;
1042 	}
1043 
1044 	if (auio != NULL) {
1045 		if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1046 			return error;
1047 		}
1048 	}
1049 #endif /* MAC_SOCKET_SUBSET */
1050 
1051 	socket_lock(so, 1);
1052 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1053 		error = EALREADY;
1054 		goto out;
1055 	}
1056 
1057 	error = soconnectxlocked(so, src, dst, p, ifscope,
1058 	    aid, pcid, flags, NULL, 0, auio, bytes_written);
1059 	if (error != 0) {
1060 		goto out;
1061 	}
1062 	/*
1063 	 * If, after the call to soconnectxlocked the flag is still set (in case
1064 	 * data has been queued and the connect() has actually been triggered,
1065 	 * it will have been unset by the transport), we exit immediately. There
1066 	 * is no reason to wait on any event.
1067 	 */
1068 	if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1069 		error = 0;
1070 		goto out;
1071 	}
1072 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1073 		error = EINPROGRESS;
1074 		goto out;
1075 	}
1076 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1077 		lck_mtx_t *mutex_held;
1078 
1079 		if (so->so_proto->pr_getlock != NULL) {
1080 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1081 		} else {
1082 			mutex_held = so->so_proto->pr_domain->dom_mtx;
1083 		}
1084 		error = msleep((caddr_t)&so->so_timeo, mutex_held,
1085 		    PSOCK | PCATCH, __func__, 0);
1086 		if (so->so_state & SS_DRAINING) {
1087 			error = ECONNABORTED;
1088 		}
1089 		if (error != 0) {
1090 			break;
1091 		}
1092 	}
1093 	if (error == 0) {
1094 		error = so->so_error;
1095 		so->so_error = 0;
1096 	}
1097 out:
1098 	socket_unlock(so, 1);
1099 	return error;
1100 }
1101 
1102 int
peeloff(proc_ref_t p,struct peeloff_args * uap,int * retval)1103 peeloff(proc_ref_t p, struct peeloff_args *uap, int *retval)
1104 {
1105 #pragma unused(p, uap, retval)
1106 	/*
1107 	 * Due to similiarity with a POSIX interface, define as
1108 	 * an unofficial cancellation point.
1109 	 */
1110 	__pthread_testcancel(1);
1111 	return 0;
1112 }
1113 
1114 int
disconnectx(proc_ref_t p,struct disconnectx_args * uap,int * retval)1115 disconnectx(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1116 {
1117 	/*
1118 	 * Due to similiarity with a POSIX interface, define as
1119 	 * an unofficial cancellation point.
1120 	 */
1121 	__pthread_testcancel(1);
1122 	return disconnectx_nocancel(p, uap, retval);
1123 }
1124 
1125 static int
disconnectx_nocancel(proc_ref_t p,struct disconnectx_args * uap,int * retval)1126 disconnectx_nocancel(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1127 {
1128 #pragma unused(p, retval)
1129 	socket_ref_t so;
1130 	int fd = uap->s;
1131 	int error;
1132 
1133 	error = file_socket(fd, &so);
1134 	if (error != 0) {
1135 		return error;
1136 	}
1137 	if (so == NULL) {
1138 		error = EBADF;
1139 		goto out;
1140 	}
1141 
1142 	error = sodisconnectx(so, uap->aid, uap->cid);
1143 out:
1144 	file_drop(fd);
1145 	return error;
1146 }
1147 
1148 /*
1149  * Returns:	0			Success
1150  *	socreate:EAFNOSUPPORT
1151  *	socreate:EPROTOTYPE
1152  *	socreate:EPROTONOSUPPORT
1153  *	socreate:ENOBUFS
1154  *	socreate:ENOMEM
1155  *	socreate:EISCONN
1156  *	socreate:???			[other protocol families, IPSEC]
1157  *	falloc:ENFILE
1158  *	falloc:EMFILE
1159  *	falloc:ENOMEM
1160  *	copyout:EFAULT
1161  *	soconnect2:EINVAL
1162  *	soconnect2:EPROTOTYPE
1163  *	soconnect2:???			[other protocol families[
1164  */
1165 int
socketpair(proc_ref_t p,struct socketpair_args * uap,__unused int32_ref_t retval)1166 socketpair(proc_ref_t p, struct socketpair_args *uap,
1167     __unused int32_ref_t retval)
1168 {
1169 	fileproc_ref_t  fp1, fp2;
1170 	socket_ref_t so1, so2;
1171 	int fd, error, sv[2];
1172 
1173 	AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1174 	error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1175 	if (error) {
1176 		return error;
1177 	}
1178 	error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1179 	if (error) {
1180 		goto free1;
1181 	}
1182 
1183 	error = falloc(p, &fp1, &fd, vfs_context_current());
1184 	if (error) {
1185 		goto free2;
1186 	}
1187 	fp1->f_flag = FREAD | FWRITE;
1188 	fp1->f_ops = &socketops;
1189 	fp_set_data(fp1, so1);
1190 	sv[0] = fd;
1191 
1192 	error = falloc(p, &fp2, &fd, vfs_context_current());
1193 	if (error) {
1194 		goto free3;
1195 	}
1196 	fp2->f_flag = FREAD | FWRITE;
1197 	fp2->f_ops = &socketops;
1198 	fp_set_data(fp2, so2);
1199 	sv[1] = fd;
1200 
1201 	error = soconnect2(so1, so2);
1202 	if (error) {
1203 		goto free4;
1204 	}
1205 	if (uap->type == SOCK_DGRAM) {
1206 		/*
1207 		 * Datagram socket connection is asymmetric.
1208 		 */
1209 		error = soconnect2(so2, so1);
1210 		if (error) {
1211 			goto free4;
1212 		}
1213 	}
1214 
1215 	if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1216 		goto free4;
1217 	}
1218 
1219 	proc_fdlock(p);
1220 	procfdtbl_releasefd(p, sv[0], NULL);
1221 	procfdtbl_releasefd(p, sv[1], NULL);
1222 	fp_drop(p, sv[0], fp1, 1);
1223 	fp_drop(p, sv[1], fp2, 1);
1224 	proc_fdunlock(p);
1225 
1226 	return 0;
1227 free4:
1228 	fp_free(p, sv[1], fp2);
1229 free3:
1230 	fp_free(p, sv[0], fp1);
1231 free2:
1232 	(void) soclose(so2);
1233 free1:
1234 	(void) soclose(so1);
1235 	return error;
1236 }
1237 
1238 /*
1239  * Returns:	0			Success
1240  *		EINVAL
1241  *		ENOBUFS
1242  *		EBADF
1243  *		EPIPE
1244  *		EACCES			Mandatory Access Control failure
1245  *	file_socket:ENOTSOCK
1246  *	file_socket:EBADF
1247  *	getsockaddr:ENAMETOOLONG	Filename too long
1248  *	getsockaddr:EINVAL		Invalid argument
1249  *	getsockaddr:ENOMEM		Not enough space
1250  *	getsockaddr:EFAULT		Bad address
1251  *	<pru_sosend>:EACCES[TCP]
1252  *	<pru_sosend>:EADDRINUSE[TCP]
1253  *	<pru_sosend>:EADDRNOTAVAIL[TCP]
1254  *	<pru_sosend>:EAFNOSUPPORT[TCP]
1255  *	<pru_sosend>:EAGAIN[TCP]
1256  *	<pru_sosend>:EBADF
1257  *	<pru_sosend>:ECONNRESET[TCP]
1258  *	<pru_sosend>:EFAULT
1259  *	<pru_sosend>:EHOSTUNREACH[TCP]
1260  *	<pru_sosend>:EINTR
1261  *	<pru_sosend>:EINVAL
1262  *	<pru_sosend>:EISCONN[AF_INET]
1263  *	<pru_sosend>:EMSGSIZE[TCP]
1264  *	<pru_sosend>:ENETDOWN[TCP]
1265  *	<pru_sosend>:ENETUNREACH[TCP]
1266  *	<pru_sosend>:ENOBUFS
1267  *	<pru_sosend>:ENOMEM[TCP]
1268  *	<pru_sosend>:ENOTCONN[AF_INET]
1269  *	<pru_sosend>:EOPNOTSUPP
1270  *	<pru_sosend>:EPERM[TCP]
1271  *	<pru_sosend>:EPIPE
1272  *	<pru_sosend>:EWOULDBLOCK
1273  *	<pru_sosend>:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
1274  *	<pru_sosend>:???[AF_INET]	[whatever a filter author chooses]
1275  *	<pru_sosend>:???		[value from so_error]
1276  *	sockargs:???
1277  */
1278 static int
sendit(proc_ref_t p,struct socket * so,user_msghdr_ref_t mp,uio_t uiop,int flags,int32_ref_t retval)1279 sendit(proc_ref_t p, struct socket *so, user_msghdr_ref_t mp, uio_t uiop,
1280     int flags, int32_ref_t retval)
1281 {
1282 	mbuf_ref_t  control = NULL;
1283 	struct sockaddr_storage ss;
1284 	sockaddr_ref_t  to = NULL;
1285 	boolean_t want_free = TRUE;
1286 	int error;
1287 	user_ssize_t len;
1288 
1289 	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1290 
1291 	if (mp->msg_name != USER_ADDR_NULL) {
1292 		if (mp->msg_namelen > sizeof(ss)) {
1293 			error = getsockaddr(so, &to, mp->msg_name,
1294 			    mp->msg_namelen, TRUE);
1295 		} else {
1296 			error = getsockaddr_s(so, &ss, mp->msg_name,
1297 			    mp->msg_namelen, TRUE);
1298 			if (error == 0) {
1299 				to = (sockaddr_ref_t)&ss;
1300 				want_free = FALSE;
1301 			}
1302 		}
1303 		if (error != 0) {
1304 			goto out;
1305 		}
1306 		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1307 	}
1308 	if (mp->msg_control != USER_ADDR_NULL) {
1309 		if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1310 			error = EINVAL;
1311 			goto bad;
1312 		}
1313 		error = sockargs(&control, mp->msg_control,
1314 		    mp->msg_controllen, MT_CONTROL);
1315 		if (error != 0) {
1316 			goto bad;
1317 		}
1318 	}
1319 
1320 #if CONFIG_MACF_SOCKET_SUBSET
1321 	/*
1322 	 * We check the state without holding the socket lock;
1323 	 * if a race condition occurs, it would simply result
1324 	 * in an extra call to the MAC check function.
1325 	 */
1326 	if (to != NULL &&
1327 	    !(so->so_state & SS_DEFUNCT) &&
1328 	    (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1329 		if (control != NULL) {
1330 			m_freem(control);
1331 		}
1332 
1333 		goto bad;
1334 	}
1335 #endif /* MAC_SOCKET_SUBSET */
1336 
1337 	len = uio_resid(uiop);
1338 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1339 	    control, flags);
1340 	if (error != 0) {
1341 		if (uio_resid(uiop) != len && (error == ERESTART ||
1342 		    error == EINTR || error == EWOULDBLOCK)) {
1343 			error = 0;
1344 		}
1345 		/* Generation of SIGPIPE can be controlled per socket */
1346 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1347 		    !(flags & MSG_NOSIGNAL)) {
1348 			psignal(p, SIGPIPE);
1349 		}
1350 	}
1351 	if (error == 0) {
1352 		*retval = (int)(len - uio_resid(uiop));
1353 	}
1354 bad:
1355 	if (want_free) {
1356 		free_sockaddr(to);
1357 	}
1358 out:
1359 	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1360 
1361 	return error;
1362 }
1363 
1364 /*
1365  * Returns:	0			Success
1366  *		ENOMEM
1367  *	sendit:???			[see sendit definition in this file]
1368  *	write:???			[4056224: applicable for pipes]
1369  */
1370 int
sendto(proc_ref_t p,struct sendto_args * uap,int32_ref_t retval)1371 sendto(proc_ref_t p, struct sendto_args *uap, int32_ref_t retval)
1372 {
1373 	__pthread_testcancel(1);
1374 	return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1375 }
1376 
1377 int
sendto_nocancel(proc_ref_t p,struct sendto_nocancel_args * uap,int32_ref_t retval)1378 sendto_nocancel(proc_ref_t p,
1379     struct sendto_nocancel_args *uap,
1380     int32_ref_t retval)
1381 {
1382 	struct user_msghdr msg;
1383 	int error;
1384 	uio_t auio = NULL;
1385 	socket_ref_t so;
1386 
1387 	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1388 	AUDIT_ARG(fd, uap->s);
1389 
1390 	if (uap->flags & MSG_SKIPCFIL) {
1391 		error = EPERM;
1392 		goto done;
1393 	}
1394 
1395 	if (uap->len > LONG_MAX) {
1396 		error = EINVAL;
1397 		goto done;
1398 	}
1399 
1400 	auio = uio_create(1, 0,
1401 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1402 	    UIO_WRITE);
1403 	if (auio == NULL) {
1404 		error = ENOMEM;
1405 		goto done;
1406 	}
1407 	uio_addiov(auio, uap->buf, uap->len);
1408 
1409 	msg.msg_name = uap->to;
1410 	msg.msg_namelen = uap->tolen;
1411 	/* no need to set up msg_iov.  sendit uses uio_t we send it */
1412 	msg.msg_iov = 0;
1413 	msg.msg_iovlen = 0;
1414 	msg.msg_control = 0;
1415 	msg.msg_flags = 0;
1416 
1417 	error = file_socket(uap->s, &so);
1418 	if (error) {
1419 		goto done;
1420 	}
1421 
1422 	if (so == NULL) {
1423 		error = EBADF;
1424 	} else {
1425 		error = sendit(p, so, &msg, auio, uap->flags, retval);
1426 	}
1427 
1428 	file_drop(uap->s);
1429 done:
1430 	if (auio != NULL) {
1431 		uio_free(auio);
1432 	}
1433 
1434 	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1435 
1436 	return error;
1437 }
1438 
1439 /*
1440  * Returns:	0			Success
1441  *		ENOBUFS
1442  *	copyin:EFAULT
1443  *	sendit:???			[see sendit definition in this file]
1444  */
1445 int
sendmsg(proc_ref_t p,struct sendmsg_args * uap,int32_ref_t retval)1446 sendmsg(proc_ref_t p, struct sendmsg_args *uap, int32_ref_t retval)
1447 {
1448 	__pthread_testcancel(1);
1449 	return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1450 	           retval);
1451 }
1452 
1453 int
sendmsg_nocancel(proc_ref_t p,struct sendmsg_nocancel_args * uap,int32_ref_t retval)1454 sendmsg_nocancel(proc_ref_t p, struct sendmsg_nocancel_args *uap,
1455     int32_ref_t retval)
1456 {
1457 	struct user32_msghdr msg32;
1458 	struct user64_msghdr msg64;
1459 	struct user_msghdr user_msg;
1460 	caddr_t msghdrp;
1461 	int     size_of_msghdr;
1462 	int error;
1463 	uio_t auio = NULL;
1464 	struct user_iovec *iovp;
1465 	socket_ref_t so;
1466 
1467 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1468 
1469 	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1470 	AUDIT_ARG(fd, uap->s);
1471 
1472 	if (uap->flags & MSG_SKIPCFIL) {
1473 		error = EPERM;
1474 		goto done;
1475 	}
1476 
1477 	if (is_p_64bit_process) {
1478 		msghdrp = (caddr_t)&msg64;
1479 		size_of_msghdr = sizeof(msg64);
1480 	} else {
1481 		msghdrp = (caddr_t)&msg32;
1482 		size_of_msghdr = sizeof(msg32);
1483 	}
1484 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
1485 	if (error) {
1486 		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1487 		return error;
1488 	}
1489 
1490 	if (is_p_64bit_process) {
1491 		user_msg.msg_flags = msg64.msg_flags;
1492 		user_msg.msg_controllen = msg64.msg_controllen;
1493 		user_msg.msg_control = (user_addr_t)msg64.msg_control;
1494 		user_msg.msg_iovlen = msg64.msg_iovlen;
1495 		user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
1496 		user_msg.msg_namelen = msg64.msg_namelen;
1497 		user_msg.msg_name = (user_addr_t)msg64.msg_name;
1498 	} else {
1499 		user_msg.msg_flags = msg32.msg_flags;
1500 		user_msg.msg_controllen = msg32.msg_controllen;
1501 		user_msg.msg_control = msg32.msg_control;
1502 		user_msg.msg_iovlen = msg32.msg_iovlen;
1503 		user_msg.msg_iov = msg32.msg_iov;
1504 		user_msg.msg_namelen = msg32.msg_namelen;
1505 		user_msg.msg_name = msg32.msg_name;
1506 	}
1507 
1508 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1509 		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1510 		    0, 0, 0, 0);
1511 		return EMSGSIZE;
1512 	}
1513 
1514 	/* allocate a uio large enough to hold the number of iovecs passed */
1515 	auio = uio_create(user_msg.msg_iovlen, 0,
1516 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1517 	    UIO_WRITE);
1518 	if (auio == NULL) {
1519 		error = ENOBUFS;
1520 		goto done;
1521 	}
1522 
1523 	if (user_msg.msg_iovlen) {
1524 		/*
1525 		 * get location of iovecs within the uio.
1526 		 * then copyin the iovecs from user space.
1527 		 */
1528 		iovp = uio_iovsaddr(auio);
1529 		if (iovp == NULL) {
1530 			error = ENOBUFS;
1531 			goto done;
1532 		}
1533 		error = copyin_user_iovec_array(user_msg.msg_iov,
1534 		    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1535 		    user_msg.msg_iovlen, iovp);
1536 		if (error) {
1537 			goto done;
1538 		}
1539 		user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1540 
1541 		/* finish setup of uio_t */
1542 		error = uio_calculateresid(auio);
1543 		if (error) {
1544 			goto done;
1545 		}
1546 	} else {
1547 		user_msg.msg_iov = 0;
1548 	}
1549 
1550 	/* msg_flags is ignored for send */
1551 	user_msg.msg_flags = 0;
1552 
1553 	error = file_socket(uap->s, &so);
1554 	if (error) {
1555 		goto done;
1556 	}
1557 	if (so == NULL) {
1558 		error = EBADF;
1559 	} else {
1560 		error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1561 	}
1562 	file_drop(uap->s);
1563 done:
1564 	if (auio != NULL) {
1565 		uio_free(auio);
1566 	}
1567 	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1568 
1569 	return error;
1570 }
1571 
1572 int
sendmsg_x(proc_ref_t p,struct sendmsg_x_args * uap,user_ssize_t * retval)1573 sendmsg_x(proc_ref_t p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1574 {
1575 	int error = 0;
1576 	user_msghdr_x_ptr_t user_msg_x = NULL;
1577 	uio_ref_ptr_t uiop = NULL;
1578 	socket_ref_t so;
1579 	u_int i;
1580 	sockaddr_ref_t to = NULL;
1581 	user_ssize_t len_before = 0, len_after;
1582 	int need_drop = 0;
1583 	size_t size_of_msghdr;
1584 	void_ptr_t umsgp = NULL;
1585 	u_int uiocnt = 0;
1586 	int has_addr_or_ctl = 0;
1587 
1588 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1589 
1590 	size_of_msghdr = IS_64BIT_PROCESS(p) ?
1591 	    sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1592 
1593 	if (uap->flags & MSG_SKIPCFIL) {
1594 		error = EPERM;
1595 		goto out;
1596 	}
1597 
1598 	error = file_socket(uap->s, &so);
1599 	if (error) {
1600 		goto out;
1601 	}
1602 	need_drop = 1;
1603 	if (so == NULL) {
1604 		error = EBADF;
1605 		goto out;
1606 	}
1607 
1608 	/*
1609 	 * Input parameter range check
1610 	 */
1611 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1612 		error = EINVAL;
1613 		goto out;
1614 	}
1615 	/*
1616 	 * Clip to max currently allowed
1617 	 */
1618 	if (uap->cnt > somaxsendmsgx) {
1619 		uap->cnt = somaxsendmsgx;
1620 	}
1621 
1622 	user_msg_x = kalloc_data(uap->cnt * sizeof(struct user_msghdr_x),
1623 	    Z_WAITOK | Z_ZERO);
1624 	if (user_msg_x == NULL) {
1625 		DBG_PRINTF("%s user_msg_x alloc failed\n", __func__);
1626 		error = ENOMEM;
1627 		goto out;
1628 	}
1629 	uiop = kalloc_type(uio_ref_t, uap->cnt, Z_WAITOK | Z_ZERO);
1630 	if (uiop == NULL) {
1631 		DBG_PRINTF("%s uiop alloc failed\n", __func__);
1632 		error = ENOMEM;
1633 		goto out;
1634 	}
1635 
1636 	umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
1637 	if (umsgp == NULL) {
1638 		printf("%s user_msg_x alloc failed\n", __func__);
1639 		error = ENOMEM;
1640 		goto out;
1641 	}
1642 	error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1643 	if (error) {
1644 		DBG_PRINTF("%s copyin() failed\n", __func__);
1645 		goto out;
1646 	}
1647 	error = internalize_user_msghdr_array(umsgp,
1648 	    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1649 	    UIO_WRITE, uap->cnt, user_msg_x, uiop);
1650 	if (error) {
1651 		DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
1652 		goto out;
1653 	}
1654 	/*
1655 	 * Make sure the size of each message iovec and
1656 	 * the aggregate size of all the iovec is valid
1657 	 */
1658 	if (uio_array_is_valid(uiop, uap->cnt) == false) {
1659 		error = EINVAL;
1660 		goto out;
1661 	}
1662 
1663 	/*
1664 	 * Sanity check on passed arguments
1665 	 */
1666 	for (i = 0; i < uap->cnt; i++) {
1667 		struct user_msghdr_x *mp = user_msg_x + i;
1668 
1669 		/*
1670 		 * No flags on send message
1671 		 */
1672 		if (mp->msg_flags != 0) {
1673 			error = EINVAL;
1674 			goto out;
1675 		}
1676 		/*
1677 		 * No support for address or ancillary data (yet)
1678 		 */
1679 		if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
1680 			has_addr_or_ctl = 1;
1681 		}
1682 
1683 		if (mp->msg_control != USER_ADDR_NULL ||
1684 		    mp->msg_controllen != 0) {
1685 			has_addr_or_ctl = 1;
1686 		}
1687 
1688 #if CONFIG_MACF_SOCKET_SUBSET
1689 		/*
1690 		 * We check the state without holding the socket lock;
1691 		 * if a race condition occurs, it would simply result
1692 		 * in an extra call to the MAC check function.
1693 		 *
1694 		 * Note: The following check is never true taken with the
1695 		 * current limitation that we do not accept to pass an address,
1696 		 * this is effectively placeholder code. If we add support for
1697 		 * addresses, we will have to check every address.
1698 		 */
1699 		if (to != NULL &&
1700 		    !(so->so_state & SS_DEFUNCT) &&
1701 		    (error = mac_socket_check_send(kauth_cred_get(), so, to))
1702 		    != 0) {
1703 			goto out;
1704 		}
1705 #endif /* MAC_SOCKET_SUBSET */
1706 	}
1707 
1708 	len_before = uio_array_resid(uiop, uap->cnt);
1709 
1710 	/*
1711 	 * Feed list of packets at once only for connected socket without
1712 	 * control message
1713 	 */
1714 	if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1715 	    pru_sosend_list_notsupp &&
1716 	    has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1717 		error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1718 		    uap->cnt, uap->flags);
1719 	} else {
1720 		for (i = 0; i < uap->cnt; i++) {
1721 			struct user_msghdr_x *mp = user_msg_x + i;
1722 			struct user_msghdr user_msg;
1723 			uio_t auio = uiop[i];
1724 			int32_t tmpval;
1725 
1726 			user_msg.msg_flags = mp->msg_flags;
1727 			user_msg.msg_controllen = mp->msg_controllen;
1728 			user_msg.msg_control = mp->msg_control;
1729 			user_msg.msg_iovlen = mp->msg_iovlen;
1730 			user_msg.msg_iov = mp->msg_iov;
1731 			user_msg.msg_namelen = mp->msg_namelen;
1732 			user_msg.msg_name = mp->msg_name;
1733 
1734 			error = sendit(p, so, &user_msg, auio, uap->flags,
1735 			    &tmpval);
1736 			if (error != 0) {
1737 				break;
1738 			}
1739 			uiocnt += 1;
1740 		}
1741 	}
1742 	len_after = uio_array_resid(uiop, uap->cnt);
1743 
1744 	VERIFY(len_after <= len_before);
1745 
1746 	if (error != 0) {
1747 		if (len_after != len_before && (error == ERESTART ||
1748 		    error == EINTR || error == EWOULDBLOCK ||
1749 		    error == ENOBUFS)) {
1750 			error = 0;
1751 		}
1752 		/* Generation of SIGPIPE can be controlled per socket */
1753 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1754 		    !(uap->flags & MSG_NOSIGNAL)) {
1755 			psignal(p, SIGPIPE);
1756 		}
1757 	}
1758 	if (error == 0) {
1759 		externalize_user_msghdr_array(umsgp,
1760 		    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1761 		    UIO_WRITE, uiocnt, user_msg_x, uiop);
1762 
1763 		*retval = (int)(uiocnt);
1764 	}
1765 out:
1766 	if (need_drop) {
1767 		file_drop(uap->s);
1768 	}
1769 	kfree_data(umsgp, uap->cnt * size_of_msghdr);
1770 	if (uiop != NULL) {
1771 		free_uio_array(uiop, uap->cnt);
1772 		kfree_type(uio_ref_t, uap->cnt, uiop);
1773 	}
1774 	kfree_data(user_msg_x, uap->cnt * sizeof(struct user_msghdr_x));
1775 
1776 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1777 
1778 	return error;
1779 }
1780 
1781 
1782 static int
copyout_sa(sockaddr_ref_t fromsa,user_addr_t name,socklen_t * namelen)1783 copyout_sa(sockaddr_ref_t fromsa, user_addr_t name, socklen_t *namelen)
1784 {
1785 	int error = 0;
1786 	socklen_t sa_len = 0;
1787 	ssize_t len;
1788 
1789 	len = *namelen;
1790 	if (len <= 0 || fromsa == 0) {
1791 		len = 0;
1792 	} else {
1793 #ifndef MIN
1794 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1795 #endif
1796 		sa_len = fromsa->sa_len;
1797 		len = MIN((unsigned int)len, sa_len);
1798 		error = copyout(fromsa, name, (unsigned)len);
1799 		if (error) {
1800 			goto out;
1801 		}
1802 	}
1803 	*namelen = sa_len;
1804 out:
1805 	return 0;
1806 }
1807 
1808 static int
copyout_control(proc_ref_t p,mbuf_ref_t m,user_addr_t control,socklen_ref_t controllen,int_ref_t flags,socket_ref_t so)1809 copyout_control(proc_ref_t p, mbuf_ref_t m, user_addr_t control,
1810     socklen_ref_t controllen, int_ref_t flags, socket_ref_t so)
1811 {
1812 	int error = 0;
1813 	socklen_t len;
1814 	user_addr_t ctlbuf;
1815 	struct inpcb *inp = NULL;
1816 	bool want_pktinfo = false;
1817 	bool seen_pktinfo = false;
1818 
1819 	if (so != NULL && (SOCK_DOM(so) == PF_INET6 || SOCK_DOM(so) == PF_INET)) {
1820 		inp = sotoinpcb(so);
1821 		want_pktinfo = (inp->inp_flags & IN6P_PKTINFO) != 0;
1822 	}
1823 
1824 	len = *controllen;
1825 	*controllen = 0;
1826 	ctlbuf = control;
1827 
1828 	while (m && len > 0) {
1829 		socklen_t tocopy;
1830 		struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1831 		socklen_t cp_size = CMSG_ALIGN(cp->cmsg_len);
1832 		socklen_t buflen = m->m_len;
1833 
1834 		while (buflen > 0 && len > 0) {
1835 			/*
1836 			 * SCM_TIMESTAMP hack because  struct timeval has a
1837 			 * different size for 32 bits and 64 bits processes
1838 			 */
1839 			if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1840 				unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
1841 				struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1842 				socklen_t tmp_space;
1843 				struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1844 
1845 				tmp_cp->cmsg_level = SOL_SOCKET;
1846 				tmp_cp->cmsg_type = SCM_TIMESTAMP;
1847 
1848 				if (proc_is64bit(p)) {
1849 					struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1850 
1851 					os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
1852 					os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
1853 
1854 					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1855 					tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1856 				} else {
1857 					struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1858 
1859 					tv32->tv_sec = (user32_time_t)tv->tv_sec;
1860 					tv32->tv_usec = tv->tv_usec;
1861 
1862 					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1863 					tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1864 				}
1865 				if (len >= tmp_space) {
1866 					tocopy = tmp_space;
1867 				} else {
1868 					*flags |= MSG_CTRUNC;
1869 					tocopy = len;
1870 				}
1871 				error = copyout(tmp_buffer, ctlbuf, tocopy);
1872 				if (error) {
1873 					goto out;
1874 				}
1875 			} else {
1876 				/* If socket has flow tracking and socket did not request address, ignore it */
1877 				if (SOFLOW_ENABLED(so) &&
1878 				    ((cp->cmsg_level == IPPROTO_IP && cp->cmsg_type == IP_RECVDSTADDR && inp != NULL &&
1879 				    !(inp->inp_flags & INP_RECVDSTADDR)) ||
1880 				    (cp->cmsg_level == IPPROTO_IPV6 && (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO) && inp &&
1881 				    !(inp->inp_flags & IN6P_PKTINFO)))) {
1882 					tocopy = 0;
1883 				} else {
1884 					if (cp_size > buflen) {
1885 						panic("cp_size > buflen, something"
1886 						    "wrong with alignment!");
1887 					}
1888 					if (len >= cp_size) {
1889 						tocopy = cp_size;
1890 					} else {
1891 						*flags |= MSG_CTRUNC;
1892 						tocopy = len;
1893 					}
1894 					error = copyout((caddr_t) cp, ctlbuf, tocopy);
1895 					if (error) {
1896 						goto out;
1897 					}
1898 					if (want_pktinfo && cp->cmsg_level == IPPROTO_IPV6 &&
1899 					    (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO)) {
1900 						seen_pktinfo = true;
1901 					}
1902 				}
1903 			}
1904 
1905 			ctlbuf += tocopy;
1906 			len -= tocopy;
1907 
1908 			buflen -= cp_size;
1909 			cp = (struct cmsghdr *)(void *)
1910 			    ((unsigned char *) cp + cp_size);
1911 			cp_size = CMSG_ALIGN(cp->cmsg_len);
1912 		}
1913 
1914 		m = m->m_next;
1915 	}
1916 	*controllen = (socklen_t)(ctlbuf - control);
1917 out:
1918 	if (want_pktinfo && !seen_pktinfo) {
1919 		missingpktinfo += 1;
1920 #if (DEBUG || DEVELOPMENT)
1921 		char pname[MAXCOMLEN];
1922 		char local[MAX_IPv6_STR_LEN + 6];
1923 		char remote[MAX_IPv6_STR_LEN + 6];
1924 
1925 		proc_name(so->last_pid, pname, sizeof(MAXCOMLEN));
1926 		if (inp->inp_vflag & INP_IPV6) {
1927 			inet_ntop(AF_INET6, &inp->in6p_laddr.s6_addr, local, sizeof(local));
1928 			inet_ntop(AF_INET6, &inp->in6p_faddr.s6_addr, remote, sizeof(local));
1929 		} else {
1930 			inet_ntop(AF_INET, &inp->inp_laddr.s_addr, local, sizeof(local));
1931 			inet_ntop(AF_INET, &inp->inp_faddr.s_addr, remote, sizeof(local));
1932 		}
1933 
1934 		os_log(OS_LOG_DEFAULT,
1935 		    "cmsg IPV6_PKTINFO missing for %s:%u > %s:%u proc %s.%u error %d\n",
1936 		    local, ntohs(inp->inp_lport), remote, ntohs(inp->inp_fport),
1937 		    pname, so->last_pid, error);
1938 #endif /* (DEBUG || DEVELOPMENT) */
1939 	}
1940 	return error;
1941 }
1942 
1943 /*
1944  * Returns:	0			Success
1945  *		ENOTSOCK
1946  *		EINVAL
1947  *		EBADF
1948  *		EACCES			Mandatory Access Control failure
1949  *	copyout:EFAULT
1950  *	fp_lookup:EBADF
1951  *	<pru_soreceive>:ENOBUFS
1952  *	<pru_soreceive>:ENOTCONN
1953  *	<pru_soreceive>:EWOULDBLOCK
1954  *	<pru_soreceive>:EFAULT
1955  *	<pru_soreceive>:EINTR
1956  *	<pru_soreceive>:EBADF
1957  *	<pru_soreceive>:EINVAL
1958  *	<pru_soreceive>:EMSGSIZE
1959  *	<pru_soreceive>:???
1960  *
1961  * Notes:	Additional return values from calls through <pru_soreceive>
1962  *		depend on protocols other than TCP or AF_UNIX, which are
1963  *		documented above.
1964  */
1965 static int
recvit(proc_ref_t p,int s,user_msghdr_ref_t mp,uio_t uiop,user_addr_t namelenp,int32_ref_t retval)1966 recvit(proc_ref_t p, int s, user_msghdr_ref_t mp, uio_t uiop,
1967     user_addr_t namelenp, int32_ref_t retval)
1968 {
1969 	ssize_t len;
1970 	int error;
1971 	mbuf_ref_t  control = 0;
1972 	socket_ref_t so;
1973 	sockaddr_ref_t  fromsa = 0;
1974 	fileproc_ref_t  fp;
1975 
1976 	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1977 	if ((error = fp_get_ftype(p, s, DTYPE_SOCKET, ENOTSOCK, &fp))) {
1978 		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1979 		return error;
1980 	}
1981 	so = (struct socket *)fp_get_data(fp);
1982 
1983 #if CONFIG_MACF_SOCKET_SUBSET
1984 	/*
1985 	 * We check the state without holding the socket lock;
1986 	 * if a race condition occurs, it would simply result
1987 	 * in an extra call to the MAC check function.
1988 	 */
1989 	if (!(so->so_state & SS_DEFUNCT) &&
1990 	    !(so->so_state & SS_ISCONNECTED) &&
1991 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1992 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
1993 		goto out1;
1994 	}
1995 #endif /* MAC_SOCKET_SUBSET */
1996 	if (uio_resid(uiop) < 0 || uio_resid(uiop) > INT_MAX) {
1997 		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1998 		error = EINVAL;
1999 		goto out1;
2000 	}
2001 
2002 	len = uio_resid(uiop);
2003 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
2004 	    NULL, mp->msg_control ? &control : NULL,
2005 	    &mp->msg_flags);
2006 	if (fromsa) {
2007 		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
2008 		    fromsa);
2009 	}
2010 	if (error) {
2011 		if (uio_resid(uiop) != len && (error == ERESTART ||
2012 		    error == EINTR || error == EWOULDBLOCK)) {
2013 			error = 0;
2014 		}
2015 	}
2016 	if (error) {
2017 		goto out;
2018 	}
2019 
2020 	*retval = (int32_t)(len - uio_resid(uiop));
2021 
2022 	if (mp->msg_name) {
2023 		error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
2024 		if (error) {
2025 			goto out;
2026 		}
2027 		/* return the actual, untruncated address length */
2028 		if (namelenp &&
2029 		    (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2030 		    sizeof(int)))) {
2031 			goto out;
2032 		}
2033 	}
2034 
2035 	if (mp->msg_control) {
2036 		error = copyout_control(p, control, mp->msg_control,
2037 		    &mp->msg_controllen, &mp->msg_flags, so);
2038 	}
2039 out:
2040 	free_sockaddr(fromsa);
2041 	if (control) {
2042 		m_freem(control);
2043 	}
2044 	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2045 out1:
2046 	fp_drop(p, s, fp, 0);
2047 	return error;
2048 }
2049 
2050 /*
2051  * Returns:	0			Success
2052  *		ENOMEM
2053  *	copyin:EFAULT
2054  *	recvit:???
2055  *	read:???			[4056224: applicable for pipes]
2056  *
2057  * Notes:	The read entry point is only called as part of support for
2058  *		binary backward compatability; new code should use read
2059  *		instead of recv or recvfrom when attempting to read data
2060  *		from pipes.
2061  *
2062  *		For full documentation of the return codes from recvit, see
2063  *		the block header for the recvit function.
2064  */
2065 int
recvfrom(proc_ref_t p,struct recvfrom_args * uap,int32_ref_t retval)2066 recvfrom(proc_ref_t p, struct recvfrom_args *uap, int32_ref_t retval)
2067 {
2068 	__pthread_testcancel(1);
2069 	return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2070 	           retval);
2071 }
2072 
2073 int
recvfrom_nocancel(proc_ref_t p,struct recvfrom_nocancel_args * uap,int32_ref_t retval)2074 recvfrom_nocancel(proc_ref_t p, struct recvfrom_nocancel_args *uap,
2075     int32_ref_t retval)
2076 {
2077 	struct user_msghdr msg;
2078 	int error;
2079 	uio_t auio = NULL;
2080 
2081 	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2082 	AUDIT_ARG(fd, uap->s);
2083 
2084 	if (uap->fromlenaddr) {
2085 		error = copyin(uap->fromlenaddr,
2086 		    (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2087 		if (error) {
2088 			return error;
2089 		}
2090 	} else {
2091 		msg.msg_namelen = 0;
2092 	}
2093 	msg.msg_name = uap->from;
2094 	auio = uio_create(1, 0,
2095 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2096 	    UIO_READ);
2097 	if (auio == NULL) {
2098 		return ENOMEM;
2099 	}
2100 
2101 	uio_addiov(auio, uap->buf, uap->len);
2102 	/* no need to set up msg_iov.  recvit uses uio_t we send it */
2103 	msg.msg_iov = 0;
2104 	msg.msg_iovlen = 0;
2105 	msg.msg_control = 0;
2106 	msg.msg_controllen = 0;
2107 	msg.msg_flags = uap->flags;
2108 	error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2109 	if (auio != NULL) {
2110 		uio_free(auio);
2111 	}
2112 
2113 	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2114 
2115 	return error;
2116 }
2117 
2118 /*
2119  * Returns:	0			Success
2120  *		EMSGSIZE
2121  *		ENOMEM
2122  *	copyin:EFAULT
2123  *	copyout:EFAULT
2124  *	recvit:???
2125  *
2126  * Notes:	For full documentation of the return codes from recvit, see
2127  *		the block header for the recvit function.
2128  */
2129 int
recvmsg(proc_ref_t p,struct recvmsg_args * uap,int32_ref_t retval)2130 recvmsg(proc_ref_t p, struct recvmsg_args *uap, int32_ref_t retval)
2131 {
2132 	__pthread_testcancel(1);
2133 	return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2134 	           retval);
2135 }
2136 
2137 int
recvmsg_nocancel(proc_ref_t p,struct recvmsg_nocancel_args * uap,int32_ref_t retval)2138 recvmsg_nocancel(proc_ref_t p, struct recvmsg_nocancel_args *uap,
2139     int32_ref_t retval)
2140 {
2141 	struct user32_msghdr msg32;
2142 	struct user64_msghdr msg64;
2143 	struct user_msghdr user_msg;
2144 	caddr_t msghdrp;
2145 	int     size_of_msghdr;
2146 	user_addr_t uiov;
2147 	int error;
2148 	uio_t auio = NULL;
2149 	struct user_iovec *iovp;
2150 
2151 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2152 
2153 	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2154 	AUDIT_ARG(fd, uap->s);
2155 	if (is_p_64bit_process) {
2156 		msghdrp = (caddr_t)&msg64;
2157 		size_of_msghdr = sizeof(msg64);
2158 	} else {
2159 		msghdrp = (caddr_t)&msg32;
2160 		size_of_msghdr = sizeof(msg32);
2161 	}
2162 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
2163 	if (error) {
2164 		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2165 		return error;
2166 	}
2167 
2168 	/* only need to copy if user process is not 64-bit */
2169 	if (is_p_64bit_process) {
2170 		user_msg.msg_flags = msg64.msg_flags;
2171 		user_msg.msg_controllen = msg64.msg_controllen;
2172 		user_msg.msg_control = (user_addr_t)msg64.msg_control;
2173 		user_msg.msg_iovlen = msg64.msg_iovlen;
2174 		user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
2175 		user_msg.msg_namelen = msg64.msg_namelen;
2176 		user_msg.msg_name = (user_addr_t)msg64.msg_name;
2177 	} else {
2178 		user_msg.msg_flags = msg32.msg_flags;
2179 		user_msg.msg_controllen = msg32.msg_controllen;
2180 		user_msg.msg_control = msg32.msg_control;
2181 		user_msg.msg_iovlen = msg32.msg_iovlen;
2182 		user_msg.msg_iov = msg32.msg_iov;
2183 		user_msg.msg_namelen = msg32.msg_namelen;
2184 		user_msg.msg_name = msg32.msg_name;
2185 	}
2186 
2187 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2188 		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2189 		    0, 0, 0, 0);
2190 		return EMSGSIZE;
2191 	}
2192 
2193 	user_msg.msg_flags = uap->flags;
2194 
2195 	/* allocate a uio large enough to hold the number of iovecs passed */
2196 	auio = uio_create(user_msg.msg_iovlen, 0,
2197 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
2198 	    UIO_READ);
2199 	if (auio == NULL) {
2200 		error = ENOMEM;
2201 		goto done;
2202 	}
2203 
2204 	/*
2205 	 * get location of iovecs within the uio.  then copyin the iovecs from
2206 	 * user space.
2207 	 */
2208 	iovp = uio_iovsaddr(auio);
2209 	if (iovp == NULL) {
2210 		error = ENOMEM;
2211 		goto done;
2212 	}
2213 	uiov = user_msg.msg_iov;
2214 	user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2215 	error = copyin_user_iovec_array(uiov,
2216 	    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2217 	    user_msg.msg_iovlen, iovp);
2218 	if (error) {
2219 		goto done;
2220 	}
2221 
2222 	/* finish setup of uio_t */
2223 	error = uio_calculateresid(auio);
2224 	if (error) {
2225 		goto done;
2226 	}
2227 
2228 	error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2229 	if (!error) {
2230 		user_msg.msg_iov = uiov;
2231 		if (is_p_64bit_process) {
2232 			msg64.msg_flags = user_msg.msg_flags;
2233 			msg64.msg_controllen = user_msg.msg_controllen;
2234 			msg64.msg_control = user_msg.msg_control;
2235 			msg64.msg_iovlen = user_msg.msg_iovlen;
2236 			msg64.msg_iov = user_msg.msg_iov;
2237 			msg64.msg_namelen = user_msg.msg_namelen;
2238 			msg64.msg_name = user_msg.msg_name;
2239 		} else {
2240 			msg32.msg_flags = user_msg.msg_flags;
2241 			msg32.msg_controllen = user_msg.msg_controllen;
2242 			msg32.msg_control = (user32_addr_t)user_msg.msg_control;
2243 			msg32.msg_iovlen = user_msg.msg_iovlen;
2244 			msg32.msg_iov = (user32_addr_t)user_msg.msg_iov;
2245 			msg32.msg_namelen = user_msg.msg_namelen;
2246 			msg32.msg_name = (user32_addr_t)user_msg.msg_name;
2247 		}
2248 		error = copyout(msghdrp, uap->msg, size_of_msghdr);
2249 	}
2250 done:
2251 	if (auio != NULL) {
2252 		uio_free(auio);
2253 	}
2254 	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2255 	return error;
2256 }
2257 
2258 int
recvmsg_x(proc_ref_t p,struct recvmsg_x_args * uap,user_ssize_t * retval)2259 recvmsg_x(proc_ref_t p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2260 {
2261 	int error = EOPNOTSUPP;
2262 	user_msghdr_x_ptr_t user_msg_x = NULL;
2263 	recv_msg_elem_ptr_t recv_msg_array = NULL;
2264 	socket_ref_t so;
2265 	user_ssize_t len_before = 0, len_after;
2266 	int need_drop = 0;
2267 	size_t size_of_msghdr;
2268 	void_ptr_t umsgp = NULL;
2269 	u_int i;
2270 	u_int uiocnt;
2271 
2272 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2273 
2274 	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2275 
2276 	size_of_msghdr = is_p_64bit_process ?
2277 	    sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2278 
2279 	error = file_socket(uap->s, &so);
2280 	if (error) {
2281 		goto out;
2282 	}
2283 	need_drop = 1;
2284 	if (so == NULL) {
2285 		error = EBADF;
2286 		goto out;
2287 	}
2288 	/*
2289 	 * Support only a subset of message flags
2290 	 */
2291 	if (uap->flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA |  MSG_NBIO)) {
2292 		return EOPNOTSUPP;
2293 	}
2294 	/*
2295 	 * Input parameter range check
2296 	 */
2297 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2298 		error = EINVAL;
2299 		goto out;
2300 	}
2301 	if (uap->cnt > somaxrecvmsgx) {
2302 		uap->cnt = somaxrecvmsgx;
2303 	}
2304 
2305 	user_msg_x = kalloc_data(uap->cnt * sizeof(struct user_msghdr_x),
2306 	    Z_WAITOK | Z_ZERO);
2307 	if (user_msg_x == NULL) {
2308 		DBG_PRINTF("%s user_msg_x alloc failed\n", __func__);
2309 		error = ENOMEM;
2310 		goto out;
2311 	}
2312 	recv_msg_array = alloc_recv_msg_array(uap->cnt);
2313 	if (recv_msg_array == NULL) {
2314 		DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
2315 		error = ENOMEM;
2316 		goto out;
2317 	}
2318 
2319 	umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
2320 	if (umsgp == NULL) {
2321 		DBG_PRINTF("%s umsgp alloc failed\n", __func__);
2322 		error = ENOMEM;
2323 		goto out;
2324 	}
2325 	error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2326 	if (error) {
2327 		DBG_PRINTF("%s copyin() failed\n", __func__);
2328 		goto out;
2329 	}
2330 	error = internalize_recv_msghdr_array(umsgp,
2331 	    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2332 	    UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2333 	if (error) {
2334 		DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
2335 		goto out;
2336 	}
2337 	/*
2338 	 * Make sure the size of each message iovec and
2339 	 * the aggregate size of all the iovec is valid
2340 	 */
2341 	if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2342 		error = EINVAL;
2343 		goto out;
2344 	}
2345 	/*
2346 	 * Sanity check on passed arguments
2347 	 */
2348 	for (i = 0; i < uap->cnt; i++) {
2349 		struct user_msghdr_x *mp = user_msg_x + i;
2350 
2351 		if (mp->msg_flags != 0) {
2352 			error = EINVAL;
2353 			goto out;
2354 		}
2355 	}
2356 #if CONFIG_MACF_SOCKET_SUBSET
2357 	/*
2358 	 * We check the state without holding the socket lock;
2359 	 * if a race condition occurs, it would simply result
2360 	 * in an extra call to the MAC check function.
2361 	 */
2362 	if (!(so->so_state & SS_DEFUNCT) &&
2363 	    !(so->so_state & SS_ISCONNECTED) &&
2364 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2365 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2366 		goto out;
2367 	}
2368 #endif /* MAC_SOCKET_SUBSET */
2369 
2370 	len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2371 
2372 	if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2373 	    pru_soreceive_list_notsupp &&
2374 	    somaxrecvmsgx == 0) {
2375 		error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2376 		    recv_msg_array, uap->cnt, &uap->flags);
2377 	} else {
2378 		int flags = uap->flags;
2379 
2380 		for (i = 0; i < uap->cnt; i++) {
2381 			struct recv_msg_elem *recv_msg_elem;
2382 			uio_t auio;
2383 			sockaddr_ref_ref_t psa;
2384 			struct mbuf **controlp;
2385 
2386 			recv_msg_elem = recv_msg_array + i;
2387 			auio = recv_msg_elem->uio;
2388 
2389 			/*
2390 			 * Do not block if we got at least one packet
2391 			 */
2392 			if (i > 0) {
2393 				flags |= MSG_DONTWAIT;
2394 			}
2395 
2396 			psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2397 			    &recv_msg_elem->psa : NULL;
2398 			controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2399 			    &recv_msg_elem->controlp : NULL;
2400 
2401 			error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2402 			    auio, NULL, controlp, &flags);
2403 			if (error) {
2404 				break;
2405 			}
2406 			/*
2407 			 * We have some data
2408 			 */
2409 			recv_msg_elem->which |= SOCK_MSG_DATA;
2410 			/*
2411 			 * Set the messages flags for this packet
2412 			 */
2413 			flags &= ~MSG_DONTWAIT;
2414 			recv_msg_elem->flags = flags;
2415 			/*
2416 			 * Stop on partial copy
2417 			 */
2418 			if (recv_msg_elem->flags & (MSG_RCVMORE | MSG_TRUNC)) {
2419 				break;
2420 			}
2421 		}
2422 	}
2423 
2424 	len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2425 
2426 	if (error) {
2427 		if (len_after != len_before && (error == ERESTART ||
2428 		    error == EINTR || error == EWOULDBLOCK)) {
2429 			error = 0;
2430 		} else {
2431 			goto out;
2432 		}
2433 	}
2434 
2435 	uiocnt = externalize_recv_msghdr_array(p, so, umsgp,
2436 	    uap->cnt, user_msg_x, recv_msg_array, &error);
2437 	if (error != 0) {
2438 		goto out;
2439 	}
2440 
2441 	error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2442 	if (error) {
2443 		DBG_PRINTF("%s copyout() failed\n", __func__);
2444 		goto out;
2445 	}
2446 	*retval = (int)(uiocnt);
2447 
2448 out:
2449 	if (need_drop) {
2450 		file_drop(uap->s);
2451 	}
2452 	kfree_data(umsgp, uap->cnt * size_of_msghdr);
2453 	free_recv_msg_array(recv_msg_array, uap->cnt);
2454 	kfree_data(user_msg_x, uap->cnt * sizeof(struct user_msghdr_x));
2455 
2456 	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2457 
2458 	return error;
2459 }
2460 
2461 /*
2462  * Returns:	0			Success
2463  *		EBADF
2464  *	file_socket:ENOTSOCK
2465  *	file_socket:EBADF
2466  *	soshutdown:EINVAL
2467  *	soshutdown:ENOTCONN
2468  *	soshutdown:EADDRNOTAVAIL[TCP]
2469  *	soshutdown:ENOBUFS[TCP]
2470  *	soshutdown:EMSGSIZE[TCP]
2471  *	soshutdown:EHOSTUNREACH[TCP]
2472  *	soshutdown:ENETUNREACH[TCP]
2473  *	soshutdown:ENETDOWN[TCP]
2474  *	soshutdown:ENOMEM[TCP]
2475  *	soshutdown:EACCES[TCP]
2476  *	soshutdown:EMSGSIZE[TCP]
2477  *	soshutdown:ENOBUFS[TCP]
2478  *	soshutdown:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
2479  *	soshutdown:???			[other protocol families]
2480  */
2481 /* ARGSUSED */
2482 int
shutdown(__unused proc_ref_t p,struct shutdown_args * uap,__unused int32_ref_t retval)2483 shutdown(__unused proc_ref_t p, struct shutdown_args *uap,
2484     __unused int32_ref_t retval)
2485 {
2486 	socket_ref_t so;
2487 	int error;
2488 
2489 	AUDIT_ARG(fd, uap->s);
2490 	error = file_socket(uap->s, &so);
2491 	if (error) {
2492 		return error;
2493 	}
2494 	if (so == NULL) {
2495 		error = EBADF;
2496 		goto out;
2497 	}
2498 	error =  soshutdown((struct socket *)so, uap->how);
2499 out:
2500 	file_drop(uap->s);
2501 	return error;
2502 }
2503 
2504 /*
2505  * Returns:	0			Success
2506  *		EFAULT
2507  *		EINVAL
2508  *		EACCES			Mandatory Access Control failure
2509  *	file_socket:ENOTSOCK
2510  *	file_socket:EBADF
2511  *	sosetopt:EINVAL
2512  *	sosetopt:ENOPROTOOPT
2513  *	sosetopt:ENOBUFS
2514  *	sosetopt:EDOM
2515  *	sosetopt:EFAULT
2516  *	sosetopt:EOPNOTSUPP[AF_UNIX]
2517  *	sosetopt:???
2518  */
2519 /* ARGSUSED */
2520 int
setsockopt(proc_ref_t p,setsockopt_args_ref_t uap,__unused int32_ref_t retval)2521 setsockopt(proc_ref_t p, setsockopt_args_ref_t uap,
2522     __unused int32_ref_t retval)
2523 {
2524 	socket_ref_t so;
2525 	struct sockopt sopt;
2526 	int error;
2527 
2528 	AUDIT_ARG(fd, uap->s);
2529 	if (uap->val == 0 && uap->valsize != 0) {
2530 		return EFAULT;
2531 	}
2532 	/* No bounds checking on size (it's unsigned) */
2533 
2534 	error = file_socket(uap->s, &so);
2535 	if (error) {
2536 		return error;
2537 	}
2538 
2539 	sopt.sopt_dir = SOPT_SET;
2540 	sopt.sopt_level = uap->level;
2541 	sopt.sopt_name = uap->name;
2542 	sopt.sopt_val = uap->val;
2543 	sopt.sopt_valsize = uap->valsize;
2544 	sopt.sopt_p = p;
2545 
2546 	if (so == NULL) {
2547 		error = EINVAL;
2548 		goto out;
2549 	}
2550 #if CONFIG_MACF_SOCKET_SUBSET
2551 	if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2552 	    &sopt)) != 0) {
2553 		goto out;
2554 	}
2555 #endif /* MAC_SOCKET_SUBSET */
2556 	error = sosetoptlock(so, &sopt, 1);     /* will lock socket */
2557 out:
2558 	file_drop(uap->s);
2559 	return error;
2560 }
2561 
2562 
2563 
2564 /*
2565  * Returns:	0			Success
2566  *		EINVAL
2567  *		EBADF
2568  *		EACCES			Mandatory Access Control failure
2569  *	copyin:EFAULT
2570  *	copyout:EFAULT
2571  *	file_socket:ENOTSOCK
2572  *	file_socket:EBADF
2573  *	sogetopt:???
2574  */
2575 int
getsockopt(proc_ref_t p,struct getsockopt_args * uap,__unused int32_ref_t retval)2576 getsockopt(proc_ref_t p, struct getsockopt_args  *uap,
2577     __unused int32_ref_t retval)
2578 {
2579 	int             error;
2580 	socklen_t       valsize;
2581 	struct sockopt  sopt;
2582 	socket_ref_t so;
2583 
2584 	error = file_socket(uap->s, &so);
2585 	if (error) {
2586 		return error;
2587 	}
2588 	if (uap->val) {
2589 		error = copyin(uap->avalsize, (caddr_t)&valsize,
2590 		    sizeof(valsize));
2591 		if (error) {
2592 			goto out;
2593 		}
2594 		/* No bounds checking on size (it's unsigned) */
2595 	} else {
2596 		valsize = 0;
2597 	}
2598 	sopt.sopt_dir = SOPT_GET;
2599 	sopt.sopt_level = uap->level;
2600 	sopt.sopt_name = uap->name;
2601 	sopt.sopt_val = uap->val;
2602 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2603 	sopt.sopt_p = p;
2604 
2605 	if (so == NULL) {
2606 		error = EBADF;
2607 		goto out;
2608 	}
2609 #if CONFIG_MACF_SOCKET_SUBSET
2610 	if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2611 	    &sopt)) != 0) {
2612 		goto out;
2613 	}
2614 #endif /* MAC_SOCKET_SUBSET */
2615 	error = sogetoptlock((struct socket *)so, &sopt, 1);    /* will lock */
2616 	if (error == 0) {
2617 		valsize = (socklen_t)sopt.sopt_valsize;
2618 		error = copyout((caddr_t)&valsize, uap->avalsize,
2619 		    sizeof(valsize));
2620 	}
2621 out:
2622 	file_drop(uap->s);
2623 	return error;
2624 }
2625 
2626 
2627 /*
2628  * Get socket name.
2629  *
2630  * Returns:	0			Success
2631  *		EBADF
2632  *	file_socket:ENOTSOCK
2633  *	file_socket:EBADF
2634  *	copyin:EFAULT
2635  *	copyout:EFAULT
2636  *	<pru_sockaddr>:ENOBUFS[TCP]
2637  *	<pru_sockaddr>:ECONNRESET[TCP]
2638  *	<pru_sockaddr>:EINVAL[AF_UNIX]
2639  *	<sf_getsockname>:???
2640  */
2641 /* ARGSUSED */
2642 int
getsockname(__unused proc_ref_t p,struct getsockname_args * uap,__unused int32_ref_t retval)2643 getsockname(__unused proc_ref_t p, struct getsockname_args *uap,
2644     __unused int32_ref_t retval)
2645 {
2646 	socket_ref_t so;
2647 	sockaddr_ref_t  sa;
2648 	socklen_t len;
2649 	socklen_t sa_len;
2650 	int error;
2651 
2652 	error = file_socket(uap->fdes, &so);
2653 	if (error) {
2654 		return error;
2655 	}
2656 	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2657 	if (error) {
2658 		goto out;
2659 	}
2660 	if (so == NULL) {
2661 		error = EBADF;
2662 		goto out;
2663 	}
2664 	sa = 0;
2665 	socket_lock(so, 1);
2666 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2667 	if (error == 0) {
2668 		error = sflt_getsockname(so, &sa);
2669 		if (error == EJUSTRETURN) {
2670 			error = 0;
2671 		}
2672 	}
2673 	socket_unlock(so, 1);
2674 	if (error) {
2675 		goto bad;
2676 	}
2677 	if (sa == 0) {
2678 		len = 0;
2679 		goto gotnothing;
2680 	}
2681 
2682 	sa_len = sa->sa_len;
2683 	len = MIN(len, sa_len);
2684 	error = copyout((caddr_t)sa, uap->asa, len);
2685 	if (error) {
2686 		goto bad;
2687 	}
2688 	/* return the actual, untruncated address length */
2689 	len = sa_len;
2690 gotnothing:
2691 	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2692 bad:
2693 	free_sockaddr(sa);
2694 out:
2695 	file_drop(uap->fdes);
2696 	return error;
2697 }
2698 
2699 /*
2700  * Get name of peer for connected socket.
2701  *
2702  * Returns:	0			Success
2703  *		EBADF
2704  *		EINVAL
2705  *		ENOTCONN
2706  *	file_socket:ENOTSOCK
2707  *	file_socket:EBADF
2708  *	copyin:EFAULT
2709  *	copyout:EFAULT
2710  *	<pru_peeraddr>:???
2711  *	<sf_getpeername>:???
2712  */
2713 /* ARGSUSED */
2714 int
getpeername(__unused proc_ref_t p,struct getpeername_args * uap,__unused int32_ref_t retval)2715 getpeername(__unused proc_ref_t p, struct getpeername_args *uap,
2716     __unused int32_ref_t retval)
2717 {
2718 	socket_ref_t so;
2719 	sockaddr_ref_t  sa;
2720 	socklen_t len;
2721 	socklen_t sa_len;
2722 	int error;
2723 
2724 	error = file_socket(uap->fdes, &so);
2725 	if (error) {
2726 		return error;
2727 	}
2728 	if (so == NULL) {
2729 		error = EBADF;
2730 		goto out;
2731 	}
2732 
2733 	socket_lock(so, 1);
2734 
2735 	if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2736 	    (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2737 		/* the socket has been shutdown, no more getpeername's */
2738 		socket_unlock(so, 1);
2739 		error = EINVAL;
2740 		goto out;
2741 	}
2742 
2743 	if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
2744 		socket_unlock(so, 1);
2745 		error = ENOTCONN;
2746 		goto out;
2747 	}
2748 	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2749 	if (error) {
2750 		socket_unlock(so, 1);
2751 		goto out;
2752 	}
2753 	sa = 0;
2754 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2755 	if (error == 0) {
2756 		error = sflt_getpeername(so, &sa);
2757 		if (error == EJUSTRETURN) {
2758 			error = 0;
2759 		}
2760 	}
2761 	socket_unlock(so, 1);
2762 	if (error) {
2763 		goto bad;
2764 	}
2765 	if (sa == 0) {
2766 		len = 0;
2767 		goto gotnothing;
2768 	}
2769 	sa_len = sa->sa_len;
2770 	len = MIN(len, sa_len);
2771 	error = copyout(sa, uap->asa, len);
2772 	if (error) {
2773 		goto bad;
2774 	}
2775 	/* return the actual, untruncated address length */
2776 	len = sa_len;
2777 gotnothing:
2778 	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2779 bad:
2780 	free_sockaddr(sa);
2781 out:
2782 	file_drop(uap->fdes);
2783 	return error;
2784 }
2785 
2786 int
sockargs(struct mbuf ** mp,user_addr_t data,socklen_t buflen,int type)2787 sockargs(struct mbuf **mp, user_addr_t data, socklen_t buflen, int type)
2788 {
2789 	sockaddr_ref_t sa;
2790 	struct mbuf *m;
2791 	int error;
2792 	socklen_t alloc_buflen = buflen;
2793 
2794 	if (buflen > INT_MAX / 2) {
2795 		return EINVAL;
2796 	}
2797 	if (type == MT_SONAME && (buflen > SOCK_MAXADDRLEN ||
2798 	    buflen < offsetof(struct sockaddr, sa_data[0]))) {
2799 		return EINVAL;
2800 	}
2801 	if (type == MT_CONTROL && buflen < sizeof(struct cmsghdr)) {
2802 		return EINVAL;
2803 	}
2804 
2805 #ifdef __LP64__
2806 	/*
2807 	 * The fd's in the buffer must expand to be pointers, thus we need twice
2808 	 * as much space
2809 	 */
2810 	if (type == MT_CONTROL) {
2811 		alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
2812 		    sizeof(struct cmsghdr);
2813 	}
2814 #endif
2815 	if (alloc_buflen > MLEN) {
2816 		if (type == MT_SONAME && alloc_buflen <= 112) {
2817 			alloc_buflen = MLEN;    /* unix domain compat. hack */
2818 		} else if (alloc_buflen > MCLBYTES) {
2819 			return EINVAL;
2820 		}
2821 	}
2822 	m = m_get(M_WAIT, type);
2823 	if (m == NULL) {
2824 		return ENOBUFS;
2825 	}
2826 	if (alloc_buflen > MLEN) {
2827 		MCLGET(m, M_WAIT);
2828 		if ((m->m_flags & M_EXT) == 0) {
2829 			m_free(m);
2830 			return ENOBUFS;
2831 		}
2832 	}
2833 	/*
2834 	 * K64: We still copyin the original buflen because it gets expanded
2835 	 * later and we lie about the size of the mbuf because it only affects
2836 	 * unp_* functions
2837 	 */
2838 	m->m_len = buflen;
2839 	error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2840 	if (error) {
2841 		(void) m_free(m);
2842 	} else {
2843 		*mp = m;
2844 		if (type == MT_SONAME) {
2845 			VERIFY(buflen <= SOCK_MAXADDRLEN);
2846 			sa = mtod(m, sockaddr_ref_t);
2847 			sa->sa_len = (__uint8_t)buflen;
2848 		}
2849 	}
2850 	return error;
2851 }
2852 
2853 /*
2854  * Given a user_addr_t of length len, allocate and fill out a *sa.
2855  *
2856  * Returns:	0			Success
2857  *		ENAMETOOLONG		Filename too long
2858  *		EINVAL			Invalid argument
2859  *		ENOMEM			Not enough space
2860  *		copyin:EFAULT		Bad address
2861  */
2862 static int
getsockaddr(struct socket * so,sockaddr_ref_ref_t namp,user_addr_t uaddr,size_t len,boolean_t translate_unspec)2863 getsockaddr(struct socket *so, sockaddr_ref_ref_t namp, user_addr_t uaddr,
2864     size_t len, boolean_t translate_unspec)
2865 {
2866 	sockaddr_ref_t  sa;
2867 	int error;
2868 
2869 	if (len > SOCK_MAXADDRLEN) {
2870 		return ENAMETOOLONG;
2871 	}
2872 
2873 	if (len < offsetof(struct sockaddr, sa_data[0])) {
2874 		return EINVAL;
2875 	}
2876 
2877 	sa = (sockaddr_ref_t)alloc_sockaddr(len, Z_WAITOK | Z_NOFAIL);
2878 
2879 	error = copyin(uaddr, (caddr_t)sa, len);
2880 	if (error) {
2881 		free_sockaddr(sa);
2882 	} else {
2883 		/*
2884 		 * Force sa_family to AF_INET on AF_INET sockets to handle
2885 		 * legacy applications that use AF_UNSPEC (0).  On all other
2886 		 * sockets we leave it unchanged and let the lower layer
2887 		 * handle it.
2888 		 */
2889 		if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2890 		    SOCK_CHECK_DOM(so, PF_INET) &&
2891 		    len == sizeof(struct sockaddr_in)) {
2892 			sa->sa_family = AF_INET;
2893 		}
2894 		VERIFY(len <= SOCK_MAXADDRLEN);
2895 		sa = *&sa;
2896 		sa->sa_len = (__uint8_t)len;
2897 		*namp = sa;
2898 	}
2899 	return error;
2900 }
2901 
2902 static int
getsockaddr_s(struct socket * so,sockaddr_storage_ref_t ss,user_addr_t uaddr,size_t len,boolean_t translate_unspec)2903 getsockaddr_s(struct socket *so, sockaddr_storage_ref_t ss,
2904     user_addr_t uaddr, size_t len, boolean_t translate_unspec)
2905 {
2906 	int error;
2907 
2908 	if (ss == NULL || uaddr == USER_ADDR_NULL ||
2909 	    len < offsetof(struct sockaddr, sa_data[0])) {
2910 		return EINVAL;
2911 	}
2912 
2913 	/*
2914 	 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2915 	 * so the check here is inclusive.
2916 	 */
2917 	if (len > sizeof(*ss)) {
2918 		return ENAMETOOLONG;
2919 	}
2920 
2921 	bzero(ss, sizeof(*ss));
2922 	error = copyin(uaddr, (caddr_t)ss, len);
2923 	if (error == 0) {
2924 		/*
2925 		 * Force sa_family to AF_INET on AF_INET sockets to handle
2926 		 * legacy applications that use AF_UNSPEC (0).  On all other
2927 		 * sockets we leave it unchanged and let the lower layer
2928 		 * handle it.
2929 		 */
2930 		if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2931 		    SOCK_CHECK_DOM(so, PF_INET) &&
2932 		    len == sizeof(struct sockaddr_in)) {
2933 			ss->ss_family = AF_INET;
2934 		}
2935 
2936 		ss->ss_len = (__uint8_t)len;
2937 	}
2938 	return error;
2939 }
2940 
2941 int
internalize_user_msghdr_array(const void_ptr_t src,int spacetype,int direction,u_int count,user_msghdr_x_ptr_t dst,uio_ref_ptr_t uiop)2942 internalize_user_msghdr_array(const void_ptr_t src, int spacetype, int direction,
2943     u_int count, user_msghdr_x_ptr_t dst, uio_ref_ptr_t uiop)
2944 {
2945 	int error = 0;
2946 	u_int i;
2947 	u_int namecnt = 0;
2948 	u_int ctlcnt = 0;
2949 
2950 	for (i = 0; i < count; i++) {
2951 		uio_t auio;
2952 		struct user_iovec *iovp;
2953 		struct user_msghdr_x *user_msg = dst + i;
2954 
2955 		if (spacetype == UIO_USERSPACE64) {
2956 			const struct user64_msghdr_x *msghdr64;
2957 
2958 			msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2959 
2960 			user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
2961 			user_msg->msg_namelen = msghdr64->msg_namelen;
2962 			user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
2963 			user_msg->msg_iovlen = msghdr64->msg_iovlen;
2964 			user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
2965 			user_msg->msg_controllen = msghdr64->msg_controllen;
2966 			user_msg->msg_flags = msghdr64->msg_flags;
2967 			user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
2968 		} else {
2969 			const struct user32_msghdr_x *msghdr32;
2970 
2971 			msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2972 
2973 			user_msg->msg_name = msghdr32->msg_name;
2974 			user_msg->msg_namelen = msghdr32->msg_namelen;
2975 			user_msg->msg_iov = msghdr32->msg_iov;
2976 			user_msg->msg_iovlen = msghdr32->msg_iovlen;
2977 			user_msg->msg_control = msghdr32->msg_control;
2978 			user_msg->msg_controllen = msghdr32->msg_controllen;
2979 			user_msg->msg_flags = msghdr32->msg_flags;
2980 			user_msg->msg_datalen = msghdr32->msg_datalen;
2981 		}
2982 
2983 		if (user_msg->msg_iovlen <= 0 ||
2984 		    user_msg->msg_iovlen > UIO_MAXIOV) {
2985 			error = EMSGSIZE;
2986 			goto done;
2987 		}
2988 		auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2989 		    direction);
2990 		if (auio == NULL) {
2991 			error = ENOMEM;
2992 			goto done;
2993 		}
2994 		uiop[i] = auio;
2995 
2996 		iovp = uio_iovsaddr(auio);
2997 		if (iovp == NULL) {
2998 			error = ENOMEM;
2999 			goto done;
3000 		}
3001 		error = copyin_user_iovec_array(user_msg->msg_iov,
3002 		    spacetype, user_msg->msg_iovlen, iovp);
3003 		if (error) {
3004 			goto done;
3005 		}
3006 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3007 
3008 		error = uio_calculateresid(auio);
3009 		if (error) {
3010 			goto done;
3011 		}
3012 		user_msg->msg_datalen = uio_resid(auio);
3013 
3014 		if (user_msg->msg_name && user_msg->msg_namelen) {
3015 			namecnt++;
3016 		}
3017 		if (user_msg->msg_control && user_msg->msg_controllen) {
3018 			ctlcnt++;
3019 		}
3020 	}
3021 done:
3022 
3023 	return error;
3024 }
3025 
3026 int
internalize_recv_msghdr_array(const void_ptr_t src,int spacetype,int direction,u_int count,user_msghdr_x_ptr_t dst,recv_msg_elem_ptr_t recv_msg_array)3027 internalize_recv_msghdr_array(const void_ptr_t src, int spacetype, int direction,
3028     u_int count, user_msghdr_x_ptr_t dst,
3029     recv_msg_elem_ptr_t recv_msg_array)
3030 {
3031 	int error = 0;
3032 	u_int i;
3033 
3034 	for (i = 0; i < count; i++) {
3035 		struct user_iovec *iovp;
3036 		struct user_msghdr_x *user_msg = dst + i;
3037 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3038 
3039 		if (spacetype == UIO_USERSPACE64) {
3040 			const struct user64_msghdr_x *msghdr64;
3041 
3042 			msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3043 
3044 			user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
3045 			user_msg->msg_namelen = msghdr64->msg_namelen;
3046 			user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
3047 			user_msg->msg_iovlen = msghdr64->msg_iovlen;
3048 			user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
3049 			user_msg->msg_controllen = msghdr64->msg_controllen;
3050 			user_msg->msg_flags = msghdr64->msg_flags;
3051 			user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
3052 		} else {
3053 			const struct user32_msghdr_x *msghdr32;
3054 
3055 			msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3056 
3057 			user_msg->msg_name = msghdr32->msg_name;
3058 			user_msg->msg_namelen = msghdr32->msg_namelen;
3059 			user_msg->msg_iov = msghdr32->msg_iov;
3060 			user_msg->msg_iovlen = msghdr32->msg_iovlen;
3061 			user_msg->msg_control = msghdr32->msg_control;
3062 			user_msg->msg_controllen = msghdr32->msg_controllen;
3063 			user_msg->msg_flags = msghdr32->msg_flags;
3064 			user_msg->msg_datalen = msghdr32->msg_datalen;
3065 		}
3066 
3067 		if (user_msg->msg_iovlen <= 0 ||
3068 		    user_msg->msg_iovlen > UIO_MAXIOV) {
3069 			error = EMSGSIZE;
3070 			goto done;
3071 		}
3072 		recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3073 		    spacetype, direction);
3074 		if (recv_msg_elem->uio == NULL) {
3075 			error = ENOMEM;
3076 			goto done;
3077 		}
3078 
3079 		iovp = uio_iovsaddr(recv_msg_elem->uio);
3080 		if (iovp == NULL) {
3081 			error = ENOMEM;
3082 			goto done;
3083 		}
3084 		error = copyin_user_iovec_array(user_msg->msg_iov,
3085 		    spacetype, user_msg->msg_iovlen, iovp);
3086 		if (error) {
3087 			goto done;
3088 		}
3089 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3090 
3091 		error = uio_calculateresid(recv_msg_elem->uio);
3092 		if (error) {
3093 			goto done;
3094 		}
3095 		user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3096 
3097 		if (user_msg->msg_name && user_msg->msg_namelen) {
3098 			recv_msg_elem->which |= SOCK_MSG_SA;
3099 		}
3100 		if (user_msg->msg_control && user_msg->msg_controllen) {
3101 			recv_msg_elem->which |= SOCK_MSG_CONTROL;
3102 		}
3103 	}
3104 done:
3105 
3106 	return error;
3107 }
3108 
3109 void
externalize_user_msghdr_array(void_ptr_t dst,int spacetype,int direction,u_int count,const user_msghdr_x_ptr_t src,uio_ref_ptr_t uiop)3110 externalize_user_msghdr_array(void_ptr_t dst, int spacetype, int direction,
3111     u_int count, const user_msghdr_x_ptr_t src, uio_ref_ptr_t uiop)
3112 {
3113 #pragma unused(direction)
3114 	u_int i;
3115 
3116 	for (i = 0; i < count; i++) {
3117 		const struct user_msghdr_x *user_msg = src + i;
3118 		uio_t auio = uiop[i];
3119 		user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3120 
3121 		if (spacetype == UIO_USERSPACE64) {
3122 			struct user64_msghdr_x *msghdr64;
3123 
3124 			msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3125 
3126 			msghdr64->msg_flags = user_msg->msg_flags;
3127 			msghdr64->msg_datalen = len;
3128 		} else {
3129 			struct user32_msghdr_x *msghdr32;
3130 
3131 			msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3132 
3133 			msghdr32->msg_flags = user_msg->msg_flags;
3134 			msghdr32->msg_datalen = (user32_size_t)len;
3135 		}
3136 	}
3137 }
3138 
3139 u_int
externalize_recv_msghdr_array(proc_ref_t p,socket_ref_t so,void_ptr_t dst,u_int count,user_msghdr_x_ptr_t src,recv_msg_elem_ptr_t recv_msg_array,int_ref_t ret_error)3140 externalize_recv_msghdr_array(proc_ref_t p, socket_ref_t so, void_ptr_t dst,
3141     u_int count, user_msghdr_x_ptr_t src,
3142     recv_msg_elem_ptr_t recv_msg_array, int_ref_t ret_error)
3143 {
3144 	u_int i;
3145 	u_int retcnt = 0;
3146 	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3147 
3148 	*ret_error = 0;
3149 
3150 	for (i = 0; i < count; i++) {
3151 		struct user_msghdr_x *user_msg = src + i;
3152 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3153 		user_ssize_t len = 0;
3154 		int error;
3155 
3156 		len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3157 
3158 		if ((recv_msg_elem->which & SOCK_MSG_DATA)) {
3159 			retcnt++;
3160 
3161 			if (recv_msg_elem->which & SOCK_MSG_SA) {
3162 				error = copyout_sa(recv_msg_elem->psa, user_msg->msg_name,
3163 				    &user_msg->msg_namelen);
3164 				if (error != 0) {
3165 					*ret_error = error;
3166 					return 0;
3167 				}
3168 			}
3169 			if (recv_msg_elem->which & SOCK_MSG_CONTROL) {
3170 				error = copyout_control(p, recv_msg_elem->controlp,
3171 				    user_msg->msg_control, &user_msg->msg_controllen,
3172 				    &recv_msg_elem->flags, so);
3173 				if (error != 0) {
3174 					*ret_error = error;
3175 					return 0;
3176 				}
3177 			}
3178 		}
3179 
3180 		if (spacetype == UIO_USERSPACE64) {
3181 			struct user64_msghdr_x *msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3182 
3183 			msghdr64->msg_namelen = user_msg->msg_namelen;
3184 			msghdr64->msg_controllen = user_msg->msg_controllen;
3185 			msghdr64->msg_flags = recv_msg_elem->flags;
3186 			msghdr64->msg_datalen = len;
3187 		} else {
3188 			struct user32_msghdr_x *msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3189 
3190 			msghdr32->msg_namelen = user_msg->msg_namelen;
3191 			msghdr32->msg_controllen = user_msg->msg_controllen;
3192 			msghdr32->msg_flags = recv_msg_elem->flags;
3193 			msghdr32->msg_datalen = (user32_size_t)len;
3194 		}
3195 	}
3196 	return retcnt;
3197 }
3198 
3199 void
free_uio_array(uio_ref_ptr_t uiop,u_int count)3200 free_uio_array(uio_ref_ptr_t uiop, u_int count)
3201 {
3202 	u_int i;
3203 
3204 	for (i = 0; i < count; i++) {
3205 		if (uiop[i] != NULL) {
3206 			uio_free(uiop[i]);
3207 		}
3208 	}
3209 }
3210 
3211 /* Extern linkage requires using __counted_by instead of bptr */
3212 __private_extern__ user_ssize_t
uio_array_resid(uio_ref_t * __counted_by (count)uiop,u_int count)3213 uio_array_resid(uio_ref_t * __counted_by(count)uiop, u_int count)
3214 {
3215 	user_ssize_t len = 0;
3216 	u_int i;
3217 
3218 	for (i = 0; i < count; i++) {
3219 		struct uio *auio = uiop[i];
3220 
3221 		if (auio != NULL) {
3222 			len += uio_resid(auio);
3223 		}
3224 	}
3225 	return len;
3226 }
3227 
3228 static boolean_t
uio_array_is_valid(uio_ref_ptr_t uiop,u_int count)3229 uio_array_is_valid(uio_ref_ptr_t uiop, u_int count)
3230 {
3231 	user_ssize_t len = 0;
3232 	u_int i;
3233 
3234 	for (i = 0; i < count; i++) {
3235 		struct uio *auio = uiop[i];
3236 
3237 		if (auio != NULL) {
3238 			user_ssize_t resid = uio_resid(auio);
3239 
3240 			/*
3241 			 * Sanity check on the validity of the iovec:
3242 			 * no point of going over sb_max
3243 			 */
3244 			if (resid < 0 || resid > (user_ssize_t)sb_max) {
3245 				return false;
3246 			}
3247 
3248 			len += resid;
3249 			if (len < 0 || len > (user_ssize_t)sb_max) {
3250 				return false;
3251 			}
3252 		}
3253 	}
3254 	return true;
3255 }
3256 
3257 
3258 recv_msg_elem_ptr_t
alloc_recv_msg_array(u_int count)3259 alloc_recv_msg_array(u_int count)
3260 {
3261 	return kalloc_type(struct recv_msg_elem, count, Z_WAITOK | Z_ZERO);
3262 }
3263 
3264 void
free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array,u_int count)3265 free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3266 {
3267 	if (recv_msg_array == NULL) {
3268 		return;
3269 	}
3270 	for (uint32_t i = 0; i < count; i++) {
3271 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3272 
3273 		if (recv_msg_elem->uio != NULL) {
3274 			uio_free(recv_msg_elem->uio);
3275 		}
3276 		free_sockaddr(recv_msg_elem->psa);
3277 		if (recv_msg_elem->controlp != NULL) {
3278 			m_freem(recv_msg_elem->controlp);
3279 		}
3280 	}
3281 	kfree_type(struct recv_msg_elem, count, recv_msg_array);
3282 }
3283 
3284 
3285 /* Extern linkage requires using __counted_by instead of bptr */
3286 __private_extern__ user_ssize_t
recv_msg_array_resid(struct recv_msg_elem * __counted_by (count)recv_msg_array,u_int count)3287 recv_msg_array_resid(struct recv_msg_elem * __counted_by(count)recv_msg_array, u_int count)
3288 {
3289 	user_ssize_t len = 0;
3290 	u_int i;
3291 
3292 	for (i = 0; i < count; i++) {
3293 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3294 
3295 		if (recv_msg_elem->uio != NULL) {
3296 			len += uio_resid(recv_msg_elem->uio);
3297 		}
3298 	}
3299 	return len;
3300 }
3301 
3302 int
recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array,u_int count)3303 recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3304 {
3305 	user_ssize_t len = 0;
3306 	u_int i;
3307 
3308 	for (i = 0; i < count; i++) {
3309 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3310 
3311 		if (recv_msg_elem->uio != NULL) {
3312 			user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3313 
3314 			/*
3315 			 * Sanity check on the validity of the iovec:
3316 			 * no point of going over sb_max
3317 			 */
3318 			if (resid < 0 || (u_int32_t)resid > sb_max) {
3319 				return 0;
3320 			}
3321 
3322 			len += resid;
3323 			if (len < 0 || (u_int32_t)len > sb_max) {
3324 				return 0;
3325 			}
3326 		}
3327 	}
3328 	return 1;
3329 }
3330 
3331 #if SENDFILE
3332 
3333 #define SFUIOBUFS 64
3334 
3335 /* Macros to compute the number of mbufs needed depending on cluster size */
3336 #define HOWMANY_16K(n)  ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3337 #define HOWMANY_4K(n)   ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3338 
3339 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3340 #define SENDFILE_MAX_BYTES      (SFUIOBUFS << PGSHIFT)
3341 
3342 /* Upper send limit in the number of mbuf clusters */
3343 #define SENDFILE_MAX_16K        HOWMANY_16K(SENDFILE_MAX_BYTES)
3344 #define SENDFILE_MAX_4K         HOWMANY_4K(SENDFILE_MAX_BYTES)
3345 
3346 static void
alloc_sendpkt(int how,size_t pktlen,unsigned int * maxchunks,mbuf_ref_ref_t m,boolean_t jumbocl)3347 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3348     mbuf_ref_ref_t m, boolean_t jumbocl)
3349 {
3350 	unsigned int needed;
3351 
3352 	if (pktlen == 0) {
3353 		panic("%s: pktlen (%ld) must be non-zero", __func__, pktlen);
3354 	}
3355 
3356 	/*
3357 	 * Try to allocate for the whole thing.  Since we want full control
3358 	 * over the buffer size and be able to accept partial result, we can't
3359 	 * use mbuf_allocpacket().  The logic below is similar to sosend().
3360 	 */
3361 	*m = NULL;
3362 	if (pktlen > MBIGCLBYTES && jumbocl) {
3363 		needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3364 		*m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3365 	}
3366 	if (*m == NULL) {
3367 		needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3368 		*m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3369 	}
3370 
3371 	/*
3372 	 * Our previous attempt(s) at allocation had failed; the system
3373 	 * may be short on mbufs, and we want to block until they are
3374 	 * available.  This time, ask just for 1 mbuf and don't return
3375 	 * until we get it.
3376 	 */
3377 	if (*m == NULL) {
3378 		needed = 1;
3379 		*m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3380 	}
3381 	if (*m == NULL) {
3382 		panic("%s: blocking allocation returned NULL", __func__);
3383 	}
3384 
3385 	*maxchunks = needed;
3386 }
3387 
3388 /*
3389  * sendfile(2).
3390  * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3391  *	 struct sf_hdtr *hdtr, int flags)
3392  *
3393  * Send a file specified by 'fd' and starting at 'offset' to a socket
3394  * specified by 's'. Send only '*nbytes' of the file or until EOF if
3395  * *nbytes == 0. Optionally add a header and/or trailer to the socket
3396  * output. If specified, write the total number of bytes sent into *nbytes.
3397  */
3398 int
sendfile(proc_ref_t p,struct sendfile_args * uap,__unused int * retval)3399 sendfile(proc_ref_t p, struct sendfile_args *uap, __unused int *retval)
3400 {
3401 	fileproc_ref_t  fp;
3402 	vnode_ref_t  vp;
3403 	socket_ref_t so;
3404 	struct writev_nocancel_args nuap;
3405 	user_ssize_t writev_retval;
3406 	struct user_sf_hdtr user_hdtr;
3407 	struct user32_sf_hdtr user32_hdtr;
3408 	struct user64_sf_hdtr user64_hdtr;
3409 	off_t off, xfsize;
3410 	off_t nbytes = 0, sbytes = 0;
3411 	int error = 0;
3412 	size_t sizeof_hdtr;
3413 	off_t file_size;
3414 	struct vfs_context context = *vfs_context_current();
3415 
3416 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
3417 
3418 	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3419 	    0, 0, 0, 0);
3420 
3421 	AUDIT_ARG(fd, uap->fd);
3422 	AUDIT_ARG(value32, uap->s);
3423 
3424 	/*
3425 	 * Do argument checking. Must be a regular file in, stream
3426 	 * type and connected socket out, positive offset.
3427 	 */
3428 	if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3429 		goto done;
3430 	}
3431 	if ((fp->f_flag & FREAD) == 0) {
3432 		error = EBADF;
3433 		goto done1;
3434 	}
3435 	if (vnode_isreg(vp) == 0) {
3436 		error = ENOTSUP;
3437 		goto done1;
3438 	}
3439 	error = file_socket(uap->s, &so);
3440 	if (error) {
3441 		goto done1;
3442 	}
3443 	if (so == NULL) {
3444 		error = EBADF;
3445 		goto done2;
3446 	}
3447 	if (so->so_type != SOCK_STREAM) {
3448 		error = EINVAL;
3449 		goto done2;
3450 	}
3451 	if ((so->so_state & SS_ISCONNECTED) == 0) {
3452 		error = ENOTCONN;
3453 		goto done2;
3454 	}
3455 	if (uap->offset < 0) {
3456 		error = EINVAL;
3457 		goto done2;
3458 	}
3459 	if (uap->nbytes == USER_ADDR_NULL) {
3460 		error = EINVAL;
3461 		goto done2;
3462 	}
3463 	if (uap->flags != 0) {
3464 		error = EINVAL;
3465 		goto done2;
3466 	}
3467 
3468 	context.vc_ucred = fp->fp_glob->fg_cred;
3469 
3470 #if CONFIG_MACF_SOCKET_SUBSET
3471 	/* JMM - fetch connected sockaddr? */
3472 	error = mac_socket_check_send(context.vc_ucred, so, NULL);
3473 	if (error) {
3474 		goto done2;
3475 	}
3476 #endif
3477 
3478 	/*
3479 	 * Get number of bytes to send
3480 	 * Should it applies to size of header and trailer?
3481 	 */
3482 	error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3483 	if (error) {
3484 		goto done2;
3485 	}
3486 
3487 	/*
3488 	 * If specified, get the pointer to the sf_hdtr struct for
3489 	 * any headers/trailers.
3490 	 */
3491 	if (uap->hdtr != USER_ADDR_NULL) {
3492 		caddr_t hdtrp;
3493 
3494 		bzero(&user_hdtr, sizeof(user_hdtr));
3495 		if (is_p_64bit_process) {
3496 			hdtrp = (caddr_t)&user64_hdtr;
3497 			sizeof_hdtr = sizeof(user64_hdtr);
3498 		} else {
3499 			hdtrp = (caddr_t)&user32_hdtr;
3500 			sizeof_hdtr = sizeof(user32_hdtr);
3501 		}
3502 		error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3503 		if (error) {
3504 			goto done2;
3505 		}
3506 		if (is_p_64bit_process) {
3507 			user_hdtr.headers = user64_hdtr.headers;
3508 			user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3509 			user_hdtr.trailers = user64_hdtr.trailers;
3510 			user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3511 		} else {
3512 			user_hdtr.headers = user32_hdtr.headers;
3513 			user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3514 			user_hdtr.trailers = user32_hdtr.trailers;
3515 			user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3516 		}
3517 
3518 		/*
3519 		 * Send any headers. Wimp out and use writev(2).
3520 		 */
3521 		if (user_hdtr.headers != USER_ADDR_NULL) {
3522 			bzero(&nuap, sizeof(struct writev_args));
3523 			nuap.fd = uap->s;
3524 			nuap.iovp = user_hdtr.headers;
3525 			nuap.iovcnt = user_hdtr.hdr_cnt;
3526 			error = writev_nocancel(p, &nuap, &writev_retval);
3527 			if (error) {
3528 				goto done2;
3529 			}
3530 			sbytes += writev_retval;
3531 		}
3532 	}
3533 
3534 	/*
3535 	 * Get the file size for 2 reasons:
3536 	 *  1. We don't want to allocate more mbufs than necessary
3537 	 *  2. We don't want to read past the end of file
3538 	 */
3539 	if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3540 		goto done2;
3541 	}
3542 
3543 	/*
3544 	 * Simply read file data into a chain of mbufs that used with scatter
3545 	 * gather reads. We're not (yet?) setup to use zero copy external
3546 	 * mbufs that point to the file pages.
3547 	 */
3548 	socket_lock(so, 1);
3549 	error = sblock(&so->so_snd, SBL_WAIT);
3550 	if (error) {
3551 		socket_unlock(so, 1);
3552 		goto done2;
3553 	}
3554 	for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3555 		mbuf_ref_t m0 = NULL;
3556 		mbuf_t  m;
3557 		unsigned int    nbufs = SFUIOBUFS, i;
3558 		uio_t   auio;
3559 		uio_stackbuf_t    uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
3560 		size_t  uiolen;
3561 		user_ssize_t    rlen;
3562 		off_t   pgoff;
3563 		size_t  pktlen;
3564 		boolean_t jumbocl;
3565 
3566 		/*
3567 		 * Calculate the amount to transfer.
3568 		 * Align to round number of pages.
3569 		 * Not to exceed send socket buffer,
3570 		 * the EOF, or the passed in nbytes.
3571 		 */
3572 		xfsize = sbspace(&so->so_snd);
3573 
3574 		if (xfsize <= 0) {
3575 			if (so->so_state & SS_CANTSENDMORE) {
3576 				error = EPIPE;
3577 				goto done3;
3578 			} else if ((so->so_state & SS_NBIO)) {
3579 				error = EAGAIN;
3580 				goto done3;
3581 			} else {
3582 				xfsize = PAGE_SIZE;
3583 			}
3584 		}
3585 
3586 		if (xfsize > SENDFILE_MAX_BYTES) {
3587 			xfsize = SENDFILE_MAX_BYTES;
3588 		} else if (xfsize > PAGE_SIZE) {
3589 			xfsize = trunc_page(xfsize);
3590 		}
3591 		pgoff = off & PAGE_MASK_64;
3592 		if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3593 			xfsize = PAGE_SIZE_64 - pgoff;
3594 		}
3595 		if (nbytes && xfsize > (nbytes - sbytes)) {
3596 			xfsize = nbytes - sbytes;
3597 		}
3598 		if (xfsize <= 0) {
3599 			break;
3600 		}
3601 		if (off + xfsize > file_size) {
3602 			xfsize = file_size - off;
3603 		}
3604 		if (xfsize <= 0) {
3605 			break;
3606 		}
3607 
3608 		/*
3609 		 * Attempt to use larger than system page-size clusters for
3610 		 * large writes only if there is a jumbo cluster pool and
3611 		 * if the socket is marked accordingly.
3612 		 */
3613 		jumbocl = sosendjcl && njcl > 0 &&
3614 		    ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3615 
3616 		socket_unlock(so, 0);
3617 		alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3618 		pktlen = mbuf_pkthdr_maxlen(m0);
3619 		if (pktlen < (size_t)xfsize) {
3620 			xfsize = pktlen;
3621 		}
3622 
3623 		auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3624 		    UIO_READ, &uio_buf[0], sizeof(uio_buf));
3625 		if (auio == NULL) {
3626 			printf("sendfile failed. nbufs = %d. %s", nbufs,
3627 			    "File a radar related to rdar://10146739.\n");
3628 			mbuf_freem(m0);
3629 			error = ENXIO;
3630 			socket_lock(so, 0);
3631 			goto done3;
3632 		}
3633 
3634 		for (i = 0, m = m0, uiolen = 0;
3635 		    i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3636 		    i++, m = mbuf_next(m)) {
3637 			size_t mlen = mbuf_maxlen(m);
3638 
3639 			if (mlen + uiolen > (size_t)xfsize) {
3640 				mlen = xfsize - uiolen;
3641 			}
3642 			mbuf_setlen(m, mlen);
3643 			uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3644 			    mlen);
3645 			uiolen += mlen;
3646 		}
3647 
3648 		if (xfsize != uio_resid(auio)) {
3649 			printf("sendfile: xfsize: %lld != uio_resid(auio): "
3650 			    "%lld\n", xfsize, (long long)uio_resid(auio));
3651 		}
3652 
3653 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3654 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3655 		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3656 		error = fo_read(fp, auio, FOF_OFFSET, &context);
3657 		socket_lock(so, 0);
3658 		if (error != 0) {
3659 			if (uio_resid(auio) != xfsize && (error == ERESTART ||
3660 			    error == EINTR || error == EWOULDBLOCK)) {
3661 				error = 0;
3662 			} else {
3663 				mbuf_freem(m0);
3664 				goto done3;
3665 			}
3666 		}
3667 		xfsize -= uio_resid(auio);
3668 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3669 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3670 		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3671 
3672 		if (xfsize == 0) {
3673 			// printf("sendfile: fo_read 0 bytes, EOF\n");
3674 			break;
3675 		}
3676 		if (xfsize + off > file_size) {
3677 			printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3678 			    "%lld\n", xfsize, off, file_size);
3679 		}
3680 		for (i = 0, m = m0, rlen = 0;
3681 		    i < nbufs && m != NULL && rlen < xfsize;
3682 		    i++, m = mbuf_next(m)) {
3683 			size_t mlen = mbuf_maxlen(m);
3684 
3685 			if (rlen + mlen > (size_t)xfsize) {
3686 				mlen = xfsize - rlen;
3687 			}
3688 			mbuf_setlen(m, mlen);
3689 
3690 			rlen += mlen;
3691 		}
3692 		mbuf_pkthdr_setlen(m0, xfsize);
3693 
3694 retry_space:
3695 		/*
3696 		 * Make sure that the socket is still able to take more data.
3697 		 * CANTSENDMORE being true usually means that the connection
3698 		 * was closed. so_error is true when an error was sensed after
3699 		 * a previous send.
3700 		 * The state is checked after the page mapping and buffer
3701 		 * allocation above since those operations may block and make
3702 		 * any socket checks stale. From this point forward, nothing
3703 		 * blocks before the pru_send (or more accurately, any blocking
3704 		 * results in a loop back to here to re-check).
3705 		 */
3706 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3707 			if (so->so_state & SS_CANTSENDMORE) {
3708 				error = EPIPE;
3709 			} else {
3710 				error = so->so_error;
3711 				so->so_error = 0;
3712 			}
3713 			m_freem(m0);
3714 			goto done3;
3715 		}
3716 		/*
3717 		 * Wait for socket space to become available. We do this just
3718 		 * after checking the connection state above in order to avoid
3719 		 * a race condition with sbwait().
3720 		 */
3721 		if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
3722 			if (so->so_state & SS_NBIO) {
3723 				m_freem(m0);
3724 				error = EAGAIN;
3725 				goto done3;
3726 			}
3727 			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3728 			    DBG_FUNC_START), uap->s, 0, 0, 0, 0);
3729 			error = sbwait(&so->so_snd);
3730 			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3731 			    DBG_FUNC_END), uap->s, 0, 0, 0, 0);
3732 			/*
3733 			 * An error from sbwait usually indicates that we've
3734 			 * been interrupted by a signal. If we've sent anything
3735 			 * then return bytes sent, otherwise return the error.
3736 			 */
3737 			if (error) {
3738 				m_freem(m0);
3739 				goto done3;
3740 			}
3741 			goto retry_space;
3742 		}
3743 
3744 		mbuf_ref_t  control = NULL;
3745 		{
3746 			/*
3747 			 * Socket filter processing
3748 			 */
3749 
3750 			error = sflt_data_out(so, NULL, &m0, &control, 0);
3751 			if (error) {
3752 				if (error == EJUSTRETURN) {
3753 					error = 0;
3754 					continue;
3755 				}
3756 				goto done3;
3757 			}
3758 			/*
3759 			 * End Socket filter processing
3760 			 */
3761 		}
3762 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3763 		    uap->s, 0, 0, 0, 0);
3764 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
3765 		    NULL, control, p);
3766 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3767 		    uap->s, 0, 0, 0, 0);
3768 		if (error) {
3769 			goto done3;
3770 		}
3771 	}
3772 	sbunlock(&so->so_snd, FALSE);   /* will unlock socket */
3773 	/*
3774 	 * Send trailers. Wimp out and use writev(2).
3775 	 */
3776 	if (uap->hdtr != USER_ADDR_NULL &&
3777 	    user_hdtr.trailers != USER_ADDR_NULL) {
3778 		bzero(&nuap, sizeof(struct writev_args));
3779 		nuap.fd = uap->s;
3780 		nuap.iovp = user_hdtr.trailers;
3781 		nuap.iovcnt = user_hdtr.trl_cnt;
3782 		error = writev_nocancel(p, &nuap, &writev_retval);
3783 		if (error) {
3784 			goto done2;
3785 		}
3786 		sbytes += writev_retval;
3787 	}
3788 done2:
3789 	file_drop(uap->s);
3790 done1:
3791 	file_drop(uap->fd);
3792 done:
3793 	if (uap->nbytes != USER_ADDR_NULL) {
3794 		/* XXX this appears bogus for some early failure conditions */
3795 		copyout(&sbytes, uap->nbytes, sizeof(off_t));
3796 	}
3797 	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3798 	    (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3799 	    (unsigned int)(sbytes & 0x0ffffffff), error, 0);
3800 	return error;
3801 done3:
3802 	sbunlock(&so->so_snd, FALSE);   /* will unlock socket */
3803 	goto done2;
3804 }
3805 
3806 
3807 #endif /* SENDFILE */
3808