xref: /xnu-8020.101.4/bsd/kern/uipc_syscalls.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1989, 1990, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * sendfile(2) and related extensions:
33  * Copyright (c) 1998, David Greenman. All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgement:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  * 4. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  *
63  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
64  */
65 /*
66  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67  * support for mandatory and extensible security protections.  This notice
68  * is included in support of clause 2.2 (b) of the Apple Public License,
69  * Version 2.0.
70  */
71 
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mcache.h>
80 #include <sys/mbuf.h>
81 #include <kern/locks.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/signalvar.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/kernel.h>
88 #include <sys/uio_internal.h>
89 #include <sys/kauth.h>
90 #include <kern/task.h>
91 #include <sys/priv.h>
92 #include <sys/sysctl.h>
93 #include <sys/sys_domain.h>
94 
95 #include <security/audit/audit.h>
96 
97 #include <sys/kdebug.h>
98 #include <sys/sysproto.h>
99 #include <netinet/in.h>
100 #include <net/route.h>
101 #include <netinet/in_pcb.h>
102 
103 #include <os/log.h>
104 #include <os/ptrtools.h>
105 
106 #include <os/log.h>
107 
108 #if CONFIG_MACF_SOCKET_SUBSET
109 #include <security/mac_framework.h>
110 #endif /* MAC_SOCKET_SUBSET */
111 
112 #define f_flag fp_glob->fg_flag
113 #define f_ops fp_glob->fg_ops
114 
115 #define DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETSOCK, 0)
116 #define DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETSOCK, 2)
117 #define DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETSOCK, 1)
118 #define DBG_LAYER_OUT_END       NETDBG_CODE(DBG_NETSOCK, 3)
119 #define DBG_FNC_SENDMSG         NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
120 #define DBG_FNC_SENDTO          NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
121 #define DBG_FNC_SENDIT          NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
122 #define DBG_FNC_RECVFROM        NETDBG_CODE(DBG_NETSOCK, (5 << 8))
123 #define DBG_FNC_RECVMSG         NETDBG_CODE(DBG_NETSOCK, (6 << 8))
124 #define DBG_FNC_RECVIT          NETDBG_CODE(DBG_NETSOCK, (7 << 8))
125 #define DBG_FNC_SENDFILE        NETDBG_CODE(DBG_NETSOCK, (10 << 8))
126 #define DBG_FNC_SENDFILE_WAIT   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
127 #define DBG_FNC_SENDFILE_READ   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
128 #define DBG_FNC_SENDFILE_SEND   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
129 #define DBG_FNC_SENDMSG_X       NETDBG_CODE(DBG_NETSOCK, (11 << 8))
130 #define DBG_FNC_RECVMSG_X       NETDBG_CODE(DBG_NETSOCK, (12 << 8))
131 
132 #if DEBUG || DEVELOPMENT
133 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
134 #define DBG_PRINTF(...) printf(__VA_ARGS__)
135 #else
136 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
137 #define DBG_PRINTF(...) do { } while (0)
138 #endif
139 
140 static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
141     int, int32_t *);
142 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
143     int32_t *);
144 static int connectit(struct socket *, struct sockaddr *);
145 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
146     size_t, boolean_t);
147 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
148     user_addr_t, size_t, boolean_t);
149 #if SENDFILE
150 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
151     boolean_t);
152 #endif /* SENDFILE */
153 static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
154 static int connectitx(struct socket *, struct sockaddr *,
155     struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
156     sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
157 static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
158     int *);
159 static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
160 
161 static int internalize_user_msghdr_array(const void *, int, int, u_int,
162     struct user_msghdr_x *, struct uio **);
163 static u_int externalize_user_msghdr_array(void *, int, int, u_int,
164     const struct user_msghdr_x *, struct uio **);
165 
166 static void free_uio_array(struct uio **, u_int);
167 static boolean_t uio_array_is_valid(struct uio **, u_int);
168 static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
169 static int internalize_recv_msghdr_array(const void *, int, int,
170     u_int, struct user_msghdr_x *, struct recv_msg_elem *);
171 static u_int externalize_recv_msghdr_array(struct proc *, struct socket *, void *, u_int,
172     struct user_msghdr_x *, struct recv_msg_elem *, int *);
173 static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
174 static void free_recv_msg_array(struct recv_msg_elem *, u_int);
175 
176 SYSCTL_DECL(_kern_ipc);
177 
178 static u_int somaxsendmsgx = 100;
179 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
180     CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
181 static u_int somaxrecvmsgx = 100;
182 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
183     CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
184 
185 static u_int missingpktinfo = 0;
186 SYSCTL_UINT(_kern_ipc, OID_AUTO, missingpktinfo,
187     CTLFLAG_RD | CTLFLAG_LOCKED, &missingpktinfo, 0, "");
188 
189 /*
190  * System call interface to the socket abstraction.
191  */
192 
193 extern const struct fileops socketops;
194 
195 /*
196  * Returns:	0			Success
197  *		EACCES			Mandatory Access Control failure
198  *	falloc:ENFILE
199  *	falloc:EMFILE
200  *	falloc:ENOMEM
201  *	socreate:EAFNOSUPPORT
202  *	socreate:EPROTOTYPE
203  *	socreate:EPROTONOSUPPORT
204  *	socreate:ENOBUFS
205  *	socreate:ENOMEM
206  *	socreate:???			[other protocol families, IPSEC]
207  */
208 int
socket(struct proc * p,struct socket_args * uap,int32_t * retval)209 socket(struct proc *p,
210     struct socket_args *uap,
211     int32_t *retval)
212 {
213 	return socket_common(p, uap->domain, uap->type, uap->protocol,
214 	           proc_selfpid(), retval, 0);
215 }
216 
217 int
socket_delegate(struct proc * p,struct socket_delegate_args * uap,int32_t * retval)218 socket_delegate(struct proc *p,
219     struct socket_delegate_args *uap,
220     int32_t *retval)
221 {
222 	return socket_common(p, uap->domain, uap->type, uap->protocol,
223 	           uap->epid, retval, 1);
224 }
225 
226 static int
socket_common(struct proc * p,int domain,int type,int protocol,pid_t epid,int32_t * retval,int delegate)227 socket_common(struct proc *p,
228     int domain,
229     int type,
230     int protocol,
231     pid_t epid,
232     int32_t *retval,
233     int delegate)
234 {
235 	struct socket *so;
236 	struct fileproc *fp;
237 	int fd, error;
238 
239 	AUDIT_ARG(socket, domain, type, protocol);
240 #if CONFIG_MACF_SOCKET_SUBSET
241 	if ((error = mac_socket_check_create(kauth_cred_get(), domain,
242 	    type, protocol)) != 0) {
243 		return error;
244 	}
245 #endif /* MAC_SOCKET_SUBSET */
246 
247 	if (delegate) {
248 		error = priv_check_cred(kauth_cred_get(),
249 		    PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
250 		if (error) {
251 			return EACCES;
252 		}
253 	}
254 
255 	error = falloc(p, &fp, &fd, vfs_context_current());
256 	if (error) {
257 		return error;
258 	}
259 	fp->f_flag = FREAD | FWRITE;
260 	fp->f_ops = &socketops;
261 
262 	if (delegate) {
263 		error = socreate_delegate(domain, &so, type, protocol, epid);
264 	} else {
265 		error = socreate(domain, &so, type, protocol);
266 	}
267 
268 	if (error) {
269 		fp_free(p, fd, fp);
270 	} else {
271 		fp_set_data(fp, so);
272 
273 		proc_fdlock(p);
274 		procfdtbl_releasefd(p, fd, NULL);
275 
276 		fp_drop(p, fd, fp, 1);
277 		proc_fdunlock(p);
278 
279 		*retval = fd;
280 		if (ENTR_SHOULDTRACE) {
281 			KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
282 			    fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
283 		}
284 	}
285 	return error;
286 }
287 
288 /*
289  * Returns:	0			Success
290  *		EDESTADDRREQ		Destination address required
291  *		EBADF			Bad file descriptor
292  *		EACCES			Mandatory Access Control failure
293  *	file_socket:ENOTSOCK
294  *	file_socket:EBADF
295  *	getsockaddr:ENAMETOOLONG	Filename too long
296  *	getsockaddr:EINVAL		Invalid argument
297  *	getsockaddr:ENOMEM		Not enough space
298  *	getsockaddr:EFAULT		Bad address
299  *	sobindlock:???
300  */
301 /* ARGSUSED */
302 int
bind(__unused proc_t p,struct bind_args * uap,__unused int32_t * retval)303 bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
304 {
305 	struct sockaddr_storage ss;
306 	struct sockaddr *sa = NULL;
307 	struct socket *so;
308 	boolean_t want_free = TRUE;
309 	int error;
310 
311 	AUDIT_ARG(fd, uap->s);
312 	error = file_socket(uap->s, &so);
313 	if (error != 0) {
314 		return error;
315 	}
316 	if (so == NULL) {
317 		error = EBADF;
318 		goto out;
319 	}
320 	if (uap->name == USER_ADDR_NULL) {
321 		error = EDESTADDRREQ;
322 		goto out;
323 	}
324 	if (uap->namelen > sizeof(ss)) {
325 		error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
326 	} else {
327 		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
328 		if (error == 0) {
329 			sa = (struct sockaddr *)&ss;
330 			want_free = FALSE;
331 		}
332 	}
333 	if (error != 0) {
334 		goto out;
335 	}
336 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
337 #if CONFIG_MACF_SOCKET_SUBSET
338 	if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
339 	    (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
340 		error = sobindlock(so, sa, 1);  /* will lock socket */
341 	}
342 #else
343 	error = sobindlock(so, sa, 1);          /* will lock socket */
344 #endif /* MAC_SOCKET_SUBSET */
345 	if (want_free) {
346 		free_sockaddr(sa);
347 	}
348 out:
349 	file_drop(uap->s);
350 	return error;
351 }
352 
353 /*
354  * Returns:	0			Success
355  *		EBADF
356  *		EACCES			Mandatory Access Control failure
357  *	file_socket:ENOTSOCK
358  *	file_socket:EBADF
359  *	solisten:EINVAL
360  *	solisten:EOPNOTSUPP
361  *	solisten:???
362  */
363 int
listen(__unused struct proc * p,struct listen_args * uap,__unused int32_t * retval)364 listen(__unused struct proc *p, struct listen_args *uap,
365     __unused int32_t *retval)
366 {
367 	int error;
368 	struct socket *so;
369 
370 	AUDIT_ARG(fd, uap->s);
371 	error = file_socket(uap->s, &so);
372 	if (error) {
373 		return error;
374 	}
375 	if (so != NULL)
376 #if CONFIG_MACF_SOCKET_SUBSET
377 	{
378 		error = mac_socket_check_listen(kauth_cred_get(), so);
379 		if (error == 0) {
380 			error = solisten(so, uap->backlog);
381 		}
382 	}
383 #else
384 	{ error = solisten(so, uap->backlog);}
385 #endif /* MAC_SOCKET_SUBSET */
386 	else {
387 		error = EBADF;
388 	}
389 
390 	file_drop(uap->s);
391 	return error;
392 }
393 
394 /*
395  * Returns:	fp_get_ftype:EBADF	Bad file descriptor
396  *		fp_get_ftype:ENOTSOCK	Socket operation on non-socket
397  *		:EFAULT			Bad address on copyin/copyout
398  *		:EBADF			Bad file descriptor
399  *		:EOPNOTSUPP		Operation not supported on socket
400  *		:EINVAL			Invalid argument
401  *		:EWOULDBLOCK		Operation would block
402  *		:ECONNABORTED		Connection aborted
403  *		:EINTR			Interrupted function
404  *		:EACCES			Mandatory Access Control failure
405  *		falloc:ENFILE		Too many files open in system
406  *		falloc:EMFILE		Too many open files
407  *		falloc:ENOMEM		Not enough space
408  *		0			Success
409  */
410 int
accept_nocancel(struct proc * p,struct accept_nocancel_args * uap,int32_t * retval)411 accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
412     int32_t *retval)
413 {
414 	struct fileproc *fp;
415 	struct sockaddr *sa = NULL;
416 	socklen_t namelen;
417 	int error;
418 	struct socket *head, *so = NULL;
419 	lck_mtx_t *mutex_held;
420 	int fd = uap->s;
421 	int newfd;
422 	unsigned int fflag;
423 	int dosocklock = 0;
424 
425 	*retval = -1;
426 
427 	AUDIT_ARG(fd, uap->s);
428 
429 	if (uap->name) {
430 		error = copyin(uap->anamelen, (caddr_t)&namelen,
431 		    sizeof(socklen_t));
432 		if (error) {
433 			return error;
434 		}
435 	}
436 	error = fp_get_ftype(p, fd, DTYPE_SOCKET, ENOTSOCK, &fp);
437 	if (error) {
438 		return error;
439 	}
440 	head = (struct socket *)fp_get_data(fp);
441 
442 #if CONFIG_MACF_SOCKET_SUBSET
443 	if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
444 		goto out;
445 	}
446 #endif /* MAC_SOCKET_SUBSET */
447 
448 	socket_lock(head, 1);
449 
450 	if (head->so_proto->pr_getlock != NULL) {
451 		mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
452 		dosocklock = 1;
453 	} else {
454 		mutex_held = head->so_proto->pr_domain->dom_mtx;
455 		dosocklock = 0;
456 	}
457 
458 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
459 		if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
460 			error = EOPNOTSUPP;
461 		} else {
462 			/* POSIX: The socket is not accepting connections */
463 			error = EINVAL;
464 		}
465 		socket_unlock(head, 1);
466 		goto out;
467 	}
468 check_again:
469 	if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
470 		socket_unlock(head, 1);
471 		error = EWOULDBLOCK;
472 		goto out;
473 	}
474 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
475 		if (head->so_state & SS_CANTRCVMORE) {
476 			head->so_error = ECONNABORTED;
477 			break;
478 		}
479 		if (head->so_usecount < 1) {
480 			panic("accept: head=%p refcount=%d", head,
481 			    head->so_usecount);
482 		}
483 		error = msleep((caddr_t)&head->so_timeo, mutex_held,
484 		    PSOCK | PCATCH, "accept", 0);
485 		if (head->so_usecount < 1) {
486 			panic("accept: 2 head=%p refcount=%d", head,
487 			    head->so_usecount);
488 		}
489 		if ((head->so_state & SS_DRAINING)) {
490 			error = ECONNABORTED;
491 		}
492 		if (error) {
493 			socket_unlock(head, 1);
494 			goto out;
495 		}
496 	}
497 	if (head->so_error) {
498 		error = head->so_error;
499 		head->so_error = 0;
500 		socket_unlock(head, 1);
501 		goto out;
502 	}
503 
504 	/*
505 	 * At this point we know that there is at least one connection
506 	 * ready to be accepted. Remove it from the queue prior to
507 	 * allocating the file descriptor for it since falloc() may
508 	 * block allowing another process to accept the connection
509 	 * instead.
510 	 */
511 	lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
512 
513 	so_acquire_accept_list(head, NULL);
514 	if (TAILQ_EMPTY(&head->so_comp)) {
515 		so_release_accept_list(head);
516 		goto check_again;
517 	}
518 
519 	so = TAILQ_FIRST(&head->so_comp);
520 	TAILQ_REMOVE(&head->so_comp, so, so_list);
521 	so->so_head = NULL;
522 	so->so_state &= ~SS_COMP;
523 	head->so_qlen--;
524 	so_release_accept_list(head);
525 
526 	/* unlock head to avoid deadlock with select, keep a ref on head */
527 	socket_unlock(head, 0);
528 
529 #if CONFIG_MACF_SOCKET_SUBSET
530 	/*
531 	 * Pass the pre-accepted socket to the MAC framework. This is
532 	 * cheaper than allocating a file descriptor for the socket,
533 	 * calling the protocol accept callback, and possibly freeing
534 	 * the file descriptor should the MAC check fails.
535 	 */
536 	if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
537 		socket_lock(so, 1);
538 		so->so_state &= ~SS_NOFDREF;
539 		socket_unlock(so, 1);
540 		soclose(so);
541 		/* Drop reference on listening socket */
542 		sodereference(head);
543 		goto out;
544 	}
545 #endif /* MAC_SOCKET_SUBSET */
546 
547 	/*
548 	 * Pass the pre-accepted socket to any interested socket filter(s).
549 	 * Upon failure, the socket would have been closed by the callee.
550 	 */
551 	if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
552 		/* Drop reference on listening socket */
553 		sodereference(head);
554 		/* Propagate socket filter's error code to the caller */
555 		goto out;
556 	}
557 
558 	fflag = fp->f_flag;
559 	error = falloc(p, &fp, &newfd, vfs_context_current());
560 	if (error) {
561 		/*
562 		 * Probably ran out of file descriptors.
563 		 *
564 		 * <rdar://problem/8554930>
565 		 * Don't put this back on the socket like we used to, that
566 		 * just causes the client to spin. Drop the socket.
567 		 */
568 		socket_lock(so, 1);
569 		so->so_state &= ~SS_NOFDREF;
570 		socket_unlock(so, 1);
571 		soclose(so);
572 		sodereference(head);
573 		goto out;
574 	}
575 	*retval = newfd;
576 	fp->f_flag = fflag;
577 	fp->f_ops = &socketops;
578 	fp_set_data(fp, so);
579 
580 	socket_lock(head, 0);
581 	if (dosocklock) {
582 		socket_lock(so, 1);
583 	}
584 
585 	/* Sync socket non-blocking/async state with file flags */
586 	if (fp->f_flag & FNONBLOCK) {
587 		so->so_state |= SS_NBIO;
588 	} else {
589 		so->so_state &= ~SS_NBIO;
590 	}
591 
592 	if (fp->f_flag & FASYNC) {
593 		so->so_state |= SS_ASYNC;
594 		so->so_rcv.sb_flags |= SB_ASYNC;
595 		so->so_snd.sb_flags |= SB_ASYNC;
596 	} else {
597 		so->so_state &= ~SS_ASYNC;
598 		so->so_rcv.sb_flags &= ~SB_ASYNC;
599 		so->so_snd.sb_flags &= ~SB_ASYNC;
600 	}
601 
602 	(void) soacceptlock(so, &sa, 0);
603 	socket_unlock(head, 1);
604 	if (sa == NULL) {
605 		namelen = 0;
606 		if (uap->name) {
607 			goto gotnoname;
608 		}
609 		error = 0;
610 		goto releasefd;
611 	}
612 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
613 
614 	if (uap->name) {
615 		socklen_t       sa_len;
616 
617 		/* save sa_len before it is destroyed */
618 		sa_len = sa->sa_len;
619 		namelen = MIN(namelen, sa_len);
620 		error = copyout(sa, uap->name, namelen);
621 		if (!error) {
622 			/* return the actual, untruncated address length */
623 			namelen = sa_len;
624 		}
625 gotnoname:
626 		error = copyout((caddr_t)&namelen, uap->anamelen,
627 		    sizeof(socklen_t));
628 	}
629 	free_sockaddr(sa);
630 
631 releasefd:
632 	/*
633 	 * If the socket has been marked as inactive by sosetdefunct(),
634 	 * disallow further operations on it.
635 	 */
636 	if (so->so_flags & SOF_DEFUNCT) {
637 		sodefunct(current_proc(), so,
638 		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
639 	}
640 
641 	if (dosocklock) {
642 		socket_unlock(so, 1);
643 	}
644 
645 	proc_fdlock(p);
646 	procfdtbl_releasefd(p, newfd, NULL);
647 	fp_drop(p, newfd, fp, 1);
648 	proc_fdunlock(p);
649 
650 out:
651 	file_drop(fd);
652 
653 	if (error == 0 && ENTR_SHOULDTRACE) {
654 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
655 		    newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
656 	}
657 	return error;
658 }
659 
660 int
accept(struct proc * p,struct accept_args * uap,int32_t * retval)661 accept(struct proc *p, struct accept_args *uap, int32_t *retval)
662 {
663 	__pthread_testcancel(1);
664 	return accept_nocancel(p, (struct accept_nocancel_args *)uap,
665 	           retval);
666 }
667 
668 /*
669  * Returns:	0			Success
670  *		EBADF			Bad file descriptor
671  *		EALREADY		Connection already in progress
672  *		EINPROGRESS		Operation in progress
673  *		ECONNABORTED		Connection aborted
674  *		EINTR			Interrupted function
675  *		EACCES			Mandatory Access Control failure
676  *	file_socket:ENOTSOCK
677  *	file_socket:EBADF
678  *	getsockaddr:ENAMETOOLONG	Filename too long
679  *	getsockaddr:EINVAL		Invalid argument
680  *	getsockaddr:ENOMEM		Not enough space
681  *	getsockaddr:EFAULT		Bad address
682  *	soconnectlock:EOPNOTSUPP
683  *	soconnectlock:EISCONN
684  *	soconnectlock:???		[depends on protocol, filters]
685  *	msleep:EINTR
686  *
687  * Imputed:	so_error		error may be set from so_error, which
688  *					may have been set by soconnectlock.
689  */
690 /* ARGSUSED */
691 int
connect(struct proc * p,struct connect_args * uap,int32_t * retval)692 connect(struct proc *p, struct connect_args *uap, int32_t *retval)
693 {
694 	__pthread_testcancel(1);
695 	return connect_nocancel(p, (struct connect_nocancel_args *)uap,
696 	           retval);
697 }
698 
699 int
connect_nocancel(proc_t p,struct connect_nocancel_args * uap,int32_t * retval)700 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
701 {
702 #pragma unused(p, retval)
703 	struct socket *so;
704 	struct sockaddr_storage ss;
705 	struct sockaddr *sa = NULL;
706 	int error;
707 	int fd = uap->s;
708 	boolean_t dgram;
709 
710 	AUDIT_ARG(fd, uap->s);
711 	error = file_socket(fd, &so);
712 	if (error != 0) {
713 		return error;
714 	}
715 	if (so == NULL) {
716 		error = EBADF;
717 		goto out;
718 	}
719 
720 	/*
721 	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
722 	 * if this is a datagram socket; translate for other types.
723 	 */
724 	dgram = (so->so_type == SOCK_DGRAM);
725 
726 	/* Get socket address now before we obtain socket lock */
727 	if (uap->namelen > sizeof(ss)) {
728 		error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
729 	} else {
730 		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
731 		if (error == 0) {
732 			sa = (struct sockaddr *)&ss;
733 		}
734 	}
735 	if (error != 0) {
736 		goto out;
737 	}
738 
739 	error = connectit(so, sa);
740 
741 	if (sa != NULL && sa != SA(&ss)) {
742 		free_sockaddr(sa);
743 	}
744 	if (error == ERESTART) {
745 		error = EINTR;
746 	}
747 out:
748 	file_drop(fd);
749 	return error;
750 }
751 
752 static int
connectx_nocancel(struct proc * p,struct connectx_args * uap,int * retval)753 connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
754 {
755 #pragma unused(p, retval)
756 	struct sockaddr_storage ss, sd;
757 	struct sockaddr *src = NULL, *dst = NULL;
758 	struct socket *so;
759 	int error, error1, fd = uap->socket;
760 	boolean_t dgram;
761 	sae_connid_t cid = SAE_CONNID_ANY;
762 	struct user32_sa_endpoints ep32;
763 	struct user64_sa_endpoints ep64;
764 	struct user_sa_endpoints ep;
765 	user_ssize_t bytes_written = 0;
766 	struct user_iovec *iovp;
767 	uio_t auio = NULL;
768 
769 	AUDIT_ARG(fd, uap->socket);
770 	error = file_socket(fd, &so);
771 	if (error != 0) {
772 		return error;
773 	}
774 	if (so == NULL) {
775 		error = EBADF;
776 		goto out;
777 	}
778 
779 	if (uap->endpoints == USER_ADDR_NULL) {
780 		error = EINVAL;
781 		goto out;
782 	}
783 
784 	if (IS_64BIT_PROCESS(p)) {
785 		error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
786 		if (error != 0) {
787 			goto out;
788 		}
789 
790 		ep.sae_srcif = ep64.sae_srcif;
791 		ep.sae_srcaddr = (user_addr_t)ep64.sae_srcaddr;
792 		ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
793 		ep.sae_dstaddr = (user_addr_t)ep64.sae_dstaddr;
794 		ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
795 	} else {
796 		error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
797 		if (error != 0) {
798 			goto out;
799 		}
800 
801 		ep.sae_srcif = ep32.sae_srcif;
802 		ep.sae_srcaddr = ep32.sae_srcaddr;
803 		ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
804 		ep.sae_dstaddr = ep32.sae_dstaddr;
805 		ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
806 	}
807 
808 	/*
809 	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
810 	 * if this is a datagram socket; translate for other types.
811 	 */
812 	dgram = (so->so_type == SOCK_DGRAM);
813 
814 	/* Get socket address now before we obtain socket lock */
815 	if (ep.sae_srcaddr != USER_ADDR_NULL) {
816 		if (ep.sae_srcaddrlen > sizeof(ss)) {
817 			error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
818 		} else {
819 			error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
820 			if (error == 0) {
821 				src = (struct sockaddr *)&ss;
822 			}
823 		}
824 
825 		if (error) {
826 			goto out;
827 		}
828 	}
829 
830 	if (ep.sae_dstaddr == USER_ADDR_NULL) {
831 		error = EINVAL;
832 		goto out;
833 	}
834 
835 	/* Get socket address now before we obtain socket lock */
836 	if (ep.sae_dstaddrlen > sizeof(sd)) {
837 		error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
838 	} else {
839 		error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
840 		if (error == 0) {
841 			dst = (struct sockaddr *)&sd;
842 		}
843 	}
844 
845 	if (error) {
846 		goto out;
847 	}
848 
849 	VERIFY(dst != NULL);
850 
851 	if (uap->iov != USER_ADDR_NULL) {
852 		/* Verify range before calling uio_create() */
853 		if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
854 			error = EINVAL;
855 			goto out;
856 		}
857 
858 		if (uap->len == USER_ADDR_NULL) {
859 			error = EINVAL;
860 			goto out;
861 		}
862 
863 		/* allocate a uio to hold the number of iovecs passed */
864 		auio = uio_create(uap->iovcnt, 0,
865 		    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
866 		    UIO_WRITE);
867 
868 		if (auio == NULL) {
869 			error = ENOMEM;
870 			goto out;
871 		}
872 
873 		/*
874 		 * get location of iovecs within the uio.
875 		 * then copyin the iovecs from user space.
876 		 */
877 		iovp = uio_iovsaddr(auio);
878 		if (iovp == NULL) {
879 			error = ENOMEM;
880 			goto out;
881 		}
882 		error = copyin_user_iovec_array(uap->iov,
883 		    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
884 		    uap->iovcnt, iovp);
885 		if (error != 0) {
886 			goto out;
887 		}
888 
889 		/* finish setup of uio_t */
890 		error = uio_calculateresid(auio);
891 		if (error != 0) {
892 			goto out;
893 		}
894 	}
895 
896 	error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
897 	    &cid, auio, uap->flags, &bytes_written);
898 	if (error == ERESTART) {
899 		error = EINTR;
900 	}
901 
902 	if (uap->len != USER_ADDR_NULL) {
903 		error1 = copyout(&bytes_written, uap->len, sizeof(uap->len));
904 		/* give precedence to connectitx errors */
905 		if ((error1 != 0) && (error == 0)) {
906 			error = error1;
907 		}
908 	}
909 
910 	if (uap->connid != USER_ADDR_NULL) {
911 		error1 = copyout(&cid, uap->connid, sizeof(cid));
912 		/* give precedence to connectitx errors */
913 		if ((error1 != 0) && (error == 0)) {
914 			error = error1;
915 		}
916 	}
917 out:
918 	file_drop(fd);
919 	if (auio != NULL) {
920 		uio_free(auio);
921 	}
922 	if (src != NULL && src != SA(&ss)) {
923 		free_sockaddr(src);
924 	}
925 	if (dst != NULL && dst != SA(&sd)) {
926 		free_sockaddr(dst);
927 	}
928 	return error;
929 }
930 
931 int
connectx(struct proc * p,struct connectx_args * uap,int * retval)932 connectx(struct proc *p, struct connectx_args *uap, int *retval)
933 {
934 	/*
935 	 * Due to similiarity with a POSIX interface, define as
936 	 * an unofficial cancellation point.
937 	 */
938 	__pthread_testcancel(1);
939 	return connectx_nocancel(p, uap, retval);
940 }
941 
942 static int
connectit(struct socket * so,struct sockaddr * sa)943 connectit(struct socket *so, struct sockaddr *sa)
944 {
945 	int error;
946 
947 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
948 #if CONFIG_MACF_SOCKET_SUBSET
949 	if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
950 		return error;
951 	}
952 #endif /* MAC_SOCKET_SUBSET */
953 
954 	socket_lock(so, 1);
955 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
956 		error = EALREADY;
957 		goto out;
958 	}
959 	error = soconnectlock(so, sa, 0);
960 	if (error != 0) {
961 		goto out;
962 	}
963 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
964 		error = EINPROGRESS;
965 		goto out;
966 	}
967 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
968 		lck_mtx_t *mutex_held;
969 
970 		if (so->so_proto->pr_getlock != NULL) {
971 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
972 		} else {
973 			mutex_held = so->so_proto->pr_domain->dom_mtx;
974 		}
975 		error = msleep((caddr_t)&so->so_timeo, mutex_held,
976 		    PSOCK | PCATCH, __func__, 0);
977 		if (so->so_state & SS_DRAINING) {
978 			error = ECONNABORTED;
979 		}
980 		if (error != 0) {
981 			break;
982 		}
983 	}
984 	if (error == 0) {
985 		error = so->so_error;
986 		so->so_error = 0;
987 	}
988 out:
989 	socket_unlock(so, 1);
990 	return error;
991 }
992 
993 static int
connectitx(struct socket * so,struct sockaddr * src,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uio_t auio,unsigned int flags,user_ssize_t * bytes_written)994 connectitx(struct socket *so, struct sockaddr *src,
995     struct sockaddr *dst, struct proc *p, uint32_t ifscope,
996     sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
997     user_ssize_t *bytes_written)
998 {
999 	int error;
1000 
1001 	VERIFY(dst != NULL);
1002 
1003 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1004 #if CONFIG_MACF_SOCKET_SUBSET
1005 	if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1006 		return error;
1007 	}
1008 
1009 	if (auio != NULL) {
1010 		if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1011 			return error;
1012 		}
1013 	}
1014 #endif /* MAC_SOCKET_SUBSET */
1015 
1016 	socket_lock(so, 1);
1017 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1018 		error = EALREADY;
1019 		goto out;
1020 	}
1021 
1022 	error = soconnectxlocked(so, src, dst, p, ifscope,
1023 	    aid, pcid, flags, NULL, 0, auio, bytes_written);
1024 	if (error != 0) {
1025 		goto out;
1026 	}
1027 	/*
1028 	 * If, after the call to soconnectxlocked the flag is still set (in case
1029 	 * data has been queued and the connect() has actually been triggered,
1030 	 * it will have been unset by the transport), we exit immediately. There
1031 	 * is no reason to wait on any event.
1032 	 */
1033 	if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1034 		error = 0;
1035 		goto out;
1036 	}
1037 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1038 		error = EINPROGRESS;
1039 		goto out;
1040 	}
1041 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1042 		lck_mtx_t *mutex_held;
1043 
1044 		if (so->so_proto->pr_getlock != NULL) {
1045 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1046 		} else {
1047 			mutex_held = so->so_proto->pr_domain->dom_mtx;
1048 		}
1049 		error = msleep((caddr_t)&so->so_timeo, mutex_held,
1050 		    PSOCK | PCATCH, __func__, 0);
1051 		if (so->so_state & SS_DRAINING) {
1052 			error = ECONNABORTED;
1053 		}
1054 		if (error != 0) {
1055 			break;
1056 		}
1057 	}
1058 	if (error == 0) {
1059 		error = so->so_error;
1060 		so->so_error = 0;
1061 	}
1062 out:
1063 	socket_unlock(so, 1);
1064 	return error;
1065 }
1066 
1067 int
peeloff(struct proc * p,struct peeloff_args * uap,int * retval)1068 peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1069 {
1070 #pragma unused(p, uap, retval)
1071 	/*
1072 	 * Due to similiarity with a POSIX interface, define as
1073 	 * an unofficial cancellation point.
1074 	 */
1075 	__pthread_testcancel(1);
1076 	return 0;
1077 }
1078 
1079 int
disconnectx(struct proc * p,struct disconnectx_args * uap,int * retval)1080 disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1081 {
1082 	/*
1083 	 * Due to similiarity with a POSIX interface, define as
1084 	 * an unofficial cancellation point.
1085 	 */
1086 	__pthread_testcancel(1);
1087 	return disconnectx_nocancel(p, uap, retval);
1088 }
1089 
1090 static int
disconnectx_nocancel(struct proc * p,struct disconnectx_args * uap,int * retval)1091 disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1092 {
1093 #pragma unused(p, retval)
1094 	struct socket *so;
1095 	int fd = uap->s;
1096 	int error;
1097 
1098 	error = file_socket(fd, &so);
1099 	if (error != 0) {
1100 		return error;
1101 	}
1102 	if (so == NULL) {
1103 		error = EBADF;
1104 		goto out;
1105 	}
1106 
1107 	error = sodisconnectx(so, uap->aid, uap->cid);
1108 out:
1109 	file_drop(fd);
1110 	return error;
1111 }
1112 
1113 /*
1114  * Returns:	0			Success
1115  *	socreate:EAFNOSUPPORT
1116  *	socreate:EPROTOTYPE
1117  *	socreate:EPROTONOSUPPORT
1118  *	socreate:ENOBUFS
1119  *	socreate:ENOMEM
1120  *	socreate:EISCONN
1121  *	socreate:???			[other protocol families, IPSEC]
1122  *	falloc:ENFILE
1123  *	falloc:EMFILE
1124  *	falloc:ENOMEM
1125  *	copyout:EFAULT
1126  *	soconnect2:EINVAL
1127  *	soconnect2:EPROTOTYPE
1128  *	soconnect2:???			[other protocol families[
1129  */
1130 int
socketpair(struct proc * p,struct socketpair_args * uap,__unused int32_t * retval)1131 socketpair(struct proc *p, struct socketpair_args *uap,
1132     __unused int32_t *retval)
1133 {
1134 	struct fileproc *fp1, *fp2;
1135 	struct socket *so1, *so2;
1136 	int fd, error, sv[2];
1137 
1138 	AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1139 	error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1140 	if (error) {
1141 		return error;
1142 	}
1143 	error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1144 	if (error) {
1145 		goto free1;
1146 	}
1147 
1148 	error = falloc(p, &fp1, &fd, vfs_context_current());
1149 	if (error) {
1150 		goto free2;
1151 	}
1152 	fp1->f_flag = FREAD | FWRITE;
1153 	fp1->f_ops = &socketops;
1154 	fp_set_data(fp1, so1);
1155 	sv[0] = fd;
1156 
1157 	error = falloc(p, &fp2, &fd, vfs_context_current());
1158 	if (error) {
1159 		goto free3;
1160 	}
1161 	fp2->f_flag = FREAD | FWRITE;
1162 	fp2->f_ops = &socketops;
1163 	fp_set_data(fp2, so2);
1164 	sv[1] = fd;
1165 
1166 	error = soconnect2(so1, so2);
1167 	if (error) {
1168 		goto free4;
1169 	}
1170 	if (uap->type == SOCK_DGRAM) {
1171 		/*
1172 		 * Datagram socket connection is asymmetric.
1173 		 */
1174 		error = soconnect2(so2, so1);
1175 		if (error) {
1176 			goto free4;
1177 		}
1178 	}
1179 
1180 	if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1181 		goto free4;
1182 	}
1183 
1184 	proc_fdlock(p);
1185 	procfdtbl_releasefd(p, sv[0], NULL);
1186 	procfdtbl_releasefd(p, sv[1], NULL);
1187 	fp_drop(p, sv[0], fp1, 1);
1188 	fp_drop(p, sv[1], fp2, 1);
1189 	proc_fdunlock(p);
1190 
1191 	return 0;
1192 free4:
1193 	fp_free(p, sv[1], fp2);
1194 free3:
1195 	fp_free(p, sv[0], fp1);
1196 free2:
1197 	(void) soclose(so2);
1198 free1:
1199 	(void) soclose(so1);
1200 	return error;
1201 }
1202 
1203 /*
1204  * Returns:	0			Success
1205  *		EINVAL
1206  *		ENOBUFS
1207  *		EBADF
1208  *		EPIPE
1209  *		EACCES			Mandatory Access Control failure
1210  *	file_socket:ENOTSOCK
1211  *	file_socket:EBADF
1212  *	getsockaddr:ENAMETOOLONG	Filename too long
1213  *	getsockaddr:EINVAL		Invalid argument
1214  *	getsockaddr:ENOMEM		Not enough space
1215  *	getsockaddr:EFAULT		Bad address
1216  *	<pru_sosend>:EACCES[TCP]
1217  *	<pru_sosend>:EADDRINUSE[TCP]
1218  *	<pru_sosend>:EADDRNOTAVAIL[TCP]
1219  *	<pru_sosend>:EAFNOSUPPORT[TCP]
1220  *	<pru_sosend>:EAGAIN[TCP]
1221  *	<pru_sosend>:EBADF
1222  *	<pru_sosend>:ECONNRESET[TCP]
1223  *	<pru_sosend>:EFAULT
1224  *	<pru_sosend>:EHOSTUNREACH[TCP]
1225  *	<pru_sosend>:EINTR
1226  *	<pru_sosend>:EINVAL
1227  *	<pru_sosend>:EISCONN[AF_INET]
1228  *	<pru_sosend>:EMSGSIZE[TCP]
1229  *	<pru_sosend>:ENETDOWN[TCP]
1230  *	<pru_sosend>:ENETUNREACH[TCP]
1231  *	<pru_sosend>:ENOBUFS
1232  *	<pru_sosend>:ENOMEM[TCP]
1233  *	<pru_sosend>:ENOTCONN[AF_INET]
1234  *	<pru_sosend>:EOPNOTSUPP
1235  *	<pru_sosend>:EPERM[TCP]
1236  *	<pru_sosend>:EPIPE
1237  *	<pru_sosend>:EWOULDBLOCK
1238  *	<pru_sosend>:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
1239  *	<pru_sosend>:???[AF_INET]	[whatever a filter author chooses]
1240  *	<pru_sosend>:???		[value from so_error]
1241  *	sockargs:???
1242  */
1243 static int
sendit(struct proc * p,struct socket * so,struct user_msghdr * mp,uio_t uiop,int flags,int32_t * retval)1244 sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
1245     int flags, int32_t *retval)
1246 {
1247 	struct mbuf *control = NULL;
1248 	struct sockaddr_storage ss;
1249 	struct sockaddr *to = NULL;
1250 	boolean_t want_free = TRUE;
1251 	int error;
1252 	user_ssize_t len;
1253 
1254 	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1255 
1256 	if (mp->msg_name != USER_ADDR_NULL) {
1257 		if (mp->msg_namelen > sizeof(ss)) {
1258 			error = getsockaddr(so, &to, mp->msg_name,
1259 			    mp->msg_namelen, TRUE);
1260 		} else {
1261 			error = getsockaddr_s(so, &ss, mp->msg_name,
1262 			    mp->msg_namelen, TRUE);
1263 			if (error == 0) {
1264 				to = (struct sockaddr *)&ss;
1265 				want_free = FALSE;
1266 			}
1267 		}
1268 		if (error != 0) {
1269 			goto out;
1270 		}
1271 		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1272 	}
1273 	if (mp->msg_control != USER_ADDR_NULL) {
1274 		if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1275 			error = EINVAL;
1276 			goto bad;
1277 		}
1278 		error = sockargs(&control, mp->msg_control,
1279 		    mp->msg_controllen, MT_CONTROL);
1280 		if (error != 0) {
1281 			goto bad;
1282 		}
1283 	}
1284 
1285 #if CONFIG_MACF_SOCKET_SUBSET
1286 	/*
1287 	 * We check the state without holding the socket lock;
1288 	 * if a race condition occurs, it would simply result
1289 	 * in an extra call to the MAC check function.
1290 	 */
1291 	if (to != NULL &&
1292 	    !(so->so_state & SS_DEFUNCT) &&
1293 	    (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1294 		if (control != NULL) {
1295 			m_freem(control);
1296 		}
1297 
1298 		goto bad;
1299 	}
1300 #endif /* MAC_SOCKET_SUBSET */
1301 
1302 	len = uio_resid(uiop);
1303 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1304 	    control, flags);
1305 	if (error != 0) {
1306 		if (uio_resid(uiop) != len && (error == ERESTART ||
1307 		    error == EINTR || error == EWOULDBLOCK)) {
1308 			error = 0;
1309 		}
1310 		/* Generation of SIGPIPE can be controlled per socket */
1311 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1312 		    !(flags & MSG_NOSIGNAL)) {
1313 			psignal(p, SIGPIPE);
1314 		}
1315 	}
1316 	if (error == 0) {
1317 		*retval = (int)(len - uio_resid(uiop));
1318 	}
1319 bad:
1320 	if (want_free) {
1321 		free_sockaddr(to);
1322 	}
1323 out:
1324 	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1325 
1326 	return error;
1327 }
1328 
1329 /*
1330  * Returns:	0			Success
1331  *		ENOMEM
1332  *	sendit:???			[see sendit definition in this file]
1333  *	write:???			[4056224: applicable for pipes]
1334  */
1335 int
sendto(struct proc * p,struct sendto_args * uap,int32_t * retval)1336 sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
1337 {
1338 	__pthread_testcancel(1);
1339 	return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1340 }
1341 
1342 int
sendto_nocancel(struct proc * p,struct sendto_nocancel_args * uap,int32_t * retval)1343 sendto_nocancel(struct proc *p,
1344     struct sendto_nocancel_args *uap,
1345     int32_t *retval)
1346 {
1347 	struct user_msghdr msg;
1348 	int error;
1349 	uio_t auio = NULL;
1350 	struct socket *so;
1351 
1352 	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1353 	AUDIT_ARG(fd, uap->s);
1354 
1355 	if (uap->flags & MSG_SKIPCFIL) {
1356 		error = EPERM;
1357 		goto done;
1358 	}
1359 
1360 	if (uap->len > LONG_MAX) {
1361 		error = EINVAL;
1362 		goto done;
1363 	}
1364 
1365 	auio = uio_create(1, 0,
1366 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1367 	    UIO_WRITE);
1368 	if (auio == NULL) {
1369 		error = ENOMEM;
1370 		goto done;
1371 	}
1372 	uio_addiov(auio, uap->buf, uap->len);
1373 
1374 	msg.msg_name = uap->to;
1375 	msg.msg_namelen = uap->tolen;
1376 	/* no need to set up msg_iov.  sendit uses uio_t we send it */
1377 	msg.msg_iov = 0;
1378 	msg.msg_iovlen = 0;
1379 	msg.msg_control = 0;
1380 	msg.msg_flags = 0;
1381 
1382 	error = file_socket(uap->s, &so);
1383 	if (error) {
1384 		goto done;
1385 	}
1386 
1387 	if (so == NULL) {
1388 		error = EBADF;
1389 	} else {
1390 		error = sendit(p, so, &msg, auio, uap->flags, retval);
1391 	}
1392 
1393 	file_drop(uap->s);
1394 done:
1395 	if (auio != NULL) {
1396 		uio_free(auio);
1397 	}
1398 
1399 	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1400 
1401 	return error;
1402 }
1403 
1404 /*
1405  * Returns:	0			Success
1406  *		ENOBUFS
1407  *	copyin:EFAULT
1408  *	sendit:???			[see sendit definition in this file]
1409  */
1410 int
sendmsg(struct proc * p,struct sendmsg_args * uap,int32_t * retval)1411 sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1412 {
1413 	__pthread_testcancel(1);
1414 	return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1415 	           retval);
1416 }
1417 
1418 int
sendmsg_nocancel(struct proc * p,struct sendmsg_nocancel_args * uap,int32_t * retval)1419 sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1420     int32_t *retval)
1421 {
1422 	struct user32_msghdr msg32;
1423 	struct user64_msghdr msg64;
1424 	struct user_msghdr user_msg;
1425 	caddr_t msghdrp;
1426 	int     size_of_msghdr;
1427 	int error;
1428 	uio_t auio = NULL;
1429 	struct user_iovec *iovp;
1430 	struct socket *so;
1431 
1432 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1433 
1434 	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1435 	AUDIT_ARG(fd, uap->s);
1436 
1437 	if (uap->flags & MSG_SKIPCFIL) {
1438 		error = EPERM;
1439 		goto done;
1440 	}
1441 
1442 	if (is_p_64bit_process) {
1443 		msghdrp = (caddr_t)&msg64;
1444 		size_of_msghdr = sizeof(msg64);
1445 	} else {
1446 		msghdrp = (caddr_t)&msg32;
1447 		size_of_msghdr = sizeof(msg32);
1448 	}
1449 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
1450 	if (error) {
1451 		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1452 		return error;
1453 	}
1454 
1455 	if (is_p_64bit_process) {
1456 		user_msg.msg_flags = msg64.msg_flags;
1457 		user_msg.msg_controllen = msg64.msg_controllen;
1458 		user_msg.msg_control = (user_addr_t)msg64.msg_control;
1459 		user_msg.msg_iovlen = msg64.msg_iovlen;
1460 		user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
1461 		user_msg.msg_namelen = msg64.msg_namelen;
1462 		user_msg.msg_name = (user_addr_t)msg64.msg_name;
1463 	} else {
1464 		user_msg.msg_flags = msg32.msg_flags;
1465 		user_msg.msg_controllen = msg32.msg_controllen;
1466 		user_msg.msg_control = msg32.msg_control;
1467 		user_msg.msg_iovlen = msg32.msg_iovlen;
1468 		user_msg.msg_iov = msg32.msg_iov;
1469 		user_msg.msg_namelen = msg32.msg_namelen;
1470 		user_msg.msg_name = msg32.msg_name;
1471 	}
1472 
1473 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1474 		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1475 		    0, 0, 0, 0);
1476 		return EMSGSIZE;
1477 	}
1478 
1479 	/* allocate a uio large enough to hold the number of iovecs passed */
1480 	auio = uio_create(user_msg.msg_iovlen, 0,
1481 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1482 	    UIO_WRITE);
1483 	if (auio == NULL) {
1484 		error = ENOBUFS;
1485 		goto done;
1486 	}
1487 
1488 	if (user_msg.msg_iovlen) {
1489 		/*
1490 		 * get location of iovecs within the uio.
1491 		 * then copyin the iovecs from user space.
1492 		 */
1493 		iovp = uio_iovsaddr(auio);
1494 		if (iovp == NULL) {
1495 			error = ENOBUFS;
1496 			goto done;
1497 		}
1498 		error = copyin_user_iovec_array(user_msg.msg_iov,
1499 		    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1500 		    user_msg.msg_iovlen, iovp);
1501 		if (error) {
1502 			goto done;
1503 		}
1504 		user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1505 
1506 		/* finish setup of uio_t */
1507 		error = uio_calculateresid(auio);
1508 		if (error) {
1509 			goto done;
1510 		}
1511 	} else {
1512 		user_msg.msg_iov = 0;
1513 	}
1514 
1515 	/* msg_flags is ignored for send */
1516 	user_msg.msg_flags = 0;
1517 
1518 	error = file_socket(uap->s, &so);
1519 	if (error) {
1520 		goto done;
1521 	}
1522 	if (so == NULL) {
1523 		error = EBADF;
1524 	} else {
1525 		error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1526 	}
1527 	file_drop(uap->s);
1528 done:
1529 	if (auio != NULL) {
1530 		uio_free(auio);
1531 	}
1532 	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1533 
1534 	return error;
1535 }
1536 
1537 int
sendmsg_x(struct proc * p,struct sendmsg_x_args * uap,user_ssize_t * retval)1538 sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1539 {
1540 	int error = 0;
1541 	struct user_msghdr_x *user_msg_x = NULL;
1542 	struct uio **uiop = NULL;
1543 	struct socket *so;
1544 	u_int i;
1545 	struct sockaddr *to = NULL;
1546 	user_ssize_t len_before = 0, len_after;
1547 	int need_drop = 0;
1548 	size_t size_of_msghdr;
1549 	void *umsgp = NULL;
1550 	u_int uiocnt;
1551 	int has_addr_or_ctl = 0;
1552 
1553 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1554 
1555 	size_of_msghdr = IS_64BIT_PROCESS(p) ?
1556 	    sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1557 
1558 	if (uap->flags & MSG_SKIPCFIL) {
1559 		error = EPERM;
1560 		goto out;
1561 	}
1562 
1563 	error = file_socket(uap->s, &so);
1564 	if (error) {
1565 		goto out;
1566 	}
1567 	need_drop = 1;
1568 	if (so == NULL) {
1569 		error = EBADF;
1570 		goto out;
1571 	}
1572 
1573 	/*
1574 	 * Input parameter range check
1575 	 */
1576 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1577 		error = EINVAL;
1578 		goto out;
1579 	}
1580 	/*
1581 	 * Clip to max currently allowed
1582 	 */
1583 	if (uap->cnt > somaxsendmsgx) {
1584 		uap->cnt = somaxsendmsgx;
1585 	}
1586 
1587 	user_msg_x = kalloc_data(uap->cnt * sizeof(struct user_msghdr_x),
1588 	    Z_WAITOK | Z_ZERO);
1589 	if (user_msg_x == NULL) {
1590 		DBG_PRINTF("%s user_msg_x alloc failed\n", __func__);
1591 		error = ENOMEM;
1592 		goto out;
1593 	}
1594 	uiop = kalloc_type(struct uio *, uap->cnt, Z_WAITOK | Z_ZERO);
1595 	if (uiop == NULL) {
1596 		DBG_PRINTF("%s uiop alloc failed\n", __func__);
1597 		error = ENOMEM;
1598 		goto out;
1599 	}
1600 
1601 	umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
1602 	if (umsgp == NULL) {
1603 		printf("%s user_msg_x alloc failed\n", __func__);
1604 		error = ENOMEM;
1605 		goto out;
1606 	}
1607 	error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1608 	if (error) {
1609 		DBG_PRINTF("%s copyin() failed\n", __func__);
1610 		goto out;
1611 	}
1612 	error = internalize_user_msghdr_array(umsgp,
1613 	    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1614 	    UIO_WRITE, uap->cnt, user_msg_x, uiop);
1615 	if (error) {
1616 		DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
1617 		goto out;
1618 	}
1619 	/*
1620 	 * Make sure the size of each message iovec and
1621 	 * the aggregate size of all the iovec is valid
1622 	 */
1623 	if (uio_array_is_valid(uiop, uap->cnt) == false) {
1624 		error = EINVAL;
1625 		goto out;
1626 	}
1627 
1628 	/*
1629 	 * Sanity check on passed arguments
1630 	 */
1631 	for (i = 0; i < uap->cnt; i++) {
1632 		struct user_msghdr_x *mp = user_msg_x + i;
1633 
1634 		/*
1635 		 * No flags on send message
1636 		 */
1637 		if (mp->msg_flags != 0) {
1638 			error = EINVAL;
1639 			goto out;
1640 		}
1641 		/*
1642 		 * No support for address or ancillary data (yet)
1643 		 */
1644 		if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
1645 			has_addr_or_ctl = 1;
1646 		}
1647 
1648 		if (mp->msg_control != USER_ADDR_NULL ||
1649 		    mp->msg_controllen != 0) {
1650 			has_addr_or_ctl = 1;
1651 		}
1652 
1653 #if CONFIG_MACF_SOCKET_SUBSET
1654 		/*
1655 		 * We check the state without holding the socket lock;
1656 		 * if a race condition occurs, it would simply result
1657 		 * in an extra call to the MAC check function.
1658 		 *
1659 		 * Note: The following check is never true taken with the
1660 		 * current limitation that we do not accept to pass an address,
1661 		 * this is effectively placeholder code. If we add support for
1662 		 * addresses, we will have to check every address.
1663 		 */
1664 		if (to != NULL &&
1665 		    !(so->so_state & SS_DEFUNCT) &&
1666 		    (error = mac_socket_check_send(kauth_cred_get(), so, to))
1667 		    != 0) {
1668 			goto out;
1669 		}
1670 #endif /* MAC_SOCKET_SUBSET */
1671 	}
1672 
1673 	len_before = uio_array_resid(uiop, uap->cnt);
1674 
1675 	/*
1676 	 * Feed list of packets at once only for connected socket without
1677 	 * control message
1678 	 */
1679 	if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1680 	    pru_sosend_list_notsupp &&
1681 	    has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1682 		error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1683 		    uap->cnt, uap->flags);
1684 	} else {
1685 		for (i = 0; i < uap->cnt; i++) {
1686 			struct user_msghdr_x *mp = user_msg_x + i;
1687 			struct user_msghdr user_msg;
1688 			uio_t auio = uiop[i];
1689 			int32_t tmpval;
1690 
1691 			user_msg.msg_flags = mp->msg_flags;
1692 			user_msg.msg_controllen = mp->msg_controllen;
1693 			user_msg.msg_control = mp->msg_control;
1694 			user_msg.msg_iovlen = mp->msg_iovlen;
1695 			user_msg.msg_iov = mp->msg_iov;
1696 			user_msg.msg_namelen = mp->msg_namelen;
1697 			user_msg.msg_name = mp->msg_name;
1698 
1699 			error = sendit(p, so, &user_msg, auio, uap->flags,
1700 			    &tmpval);
1701 			if (error != 0) {
1702 				break;
1703 			}
1704 		}
1705 	}
1706 	len_after = uio_array_resid(uiop, uap->cnt);
1707 
1708 	VERIFY(len_after <= len_before);
1709 
1710 	if (error != 0) {
1711 		if (len_after != len_before && (error == ERESTART ||
1712 		    error == EINTR || error == EWOULDBLOCK ||
1713 		    error == ENOBUFS)) {
1714 			error = 0;
1715 		}
1716 		/* Generation of SIGPIPE can be controlled per socket */
1717 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1718 		    !(uap->flags & MSG_NOSIGNAL)) {
1719 			psignal(p, SIGPIPE);
1720 		}
1721 	}
1722 	if (error == 0) {
1723 		uiocnt = externalize_user_msghdr_array(umsgp,
1724 		    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1725 		    UIO_WRITE, uap->cnt, user_msg_x, uiop);
1726 
1727 		*retval = (int)(uiocnt);
1728 	}
1729 out:
1730 	if (need_drop) {
1731 		file_drop(uap->s);
1732 	}
1733 	kfree_data(umsgp, uap->cnt * size_of_msghdr);
1734 	if (uiop != NULL) {
1735 		free_uio_array(uiop, uap->cnt);
1736 		kfree_type(struct uio *, uap->cnt, uiop);
1737 	}
1738 	kfree_data(user_msg_x, uap->cnt * sizeof(struct user_msghdr_x));
1739 
1740 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1741 
1742 	return error;
1743 }
1744 
1745 
1746 static int
copyout_sa(struct sockaddr * fromsa,user_addr_t name,socklen_t * namelen)1747 copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1748 {
1749 	int error = 0;
1750 	socklen_t sa_len = 0;
1751 	ssize_t len;
1752 
1753 	len = *namelen;
1754 	if (len <= 0 || fromsa == 0) {
1755 		len = 0;
1756 	} else {
1757 #ifndef MIN
1758 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1759 #endif
1760 		sa_len = fromsa->sa_len;
1761 		len = MIN((unsigned int)len, sa_len);
1762 		error = copyout(fromsa, name, (unsigned)len);
1763 		if (error) {
1764 			goto out;
1765 		}
1766 	}
1767 	*namelen = sa_len;
1768 out:
1769 	return 0;
1770 }
1771 
1772 static int
copyout_control(struct proc * p,struct mbuf * m,user_addr_t control,socklen_t * controllen,int * flags,struct socket * so)1773 copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1774     socklen_t *controllen, int *flags, struct socket *so)
1775 {
1776 	int error = 0;
1777 	socklen_t len;
1778 	user_addr_t ctlbuf;
1779 	struct inpcb *inp = NULL;
1780 	bool want_pktinfo = false;
1781 	bool seen_pktinfo = false;
1782 
1783 	if (so != NULL && (SOCK_DOM(so) == PF_INET6 || SOCK_DOM(so) == PF_INET)) {
1784 		inp = sotoinpcb(so);
1785 		want_pktinfo = (inp->inp_flags & IN6P_PKTINFO) != 0;
1786 	}
1787 
1788 	len = *controllen;
1789 	*controllen = 0;
1790 	ctlbuf = control;
1791 
1792 	while (m && len > 0) {
1793 		socklen_t tocopy;
1794 		struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1795 		socklen_t cp_size = CMSG_ALIGN(cp->cmsg_len);
1796 		socklen_t buflen = m->m_len;
1797 
1798 		while (buflen > 0 && len > 0) {
1799 			/*
1800 			 * SCM_TIMESTAMP hack because  struct timeval has a
1801 			 * different size for 32 bits and 64 bits processes
1802 			 */
1803 			if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1804 				unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
1805 				struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1806 				socklen_t tmp_space;
1807 				struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1808 
1809 				tmp_cp->cmsg_level = SOL_SOCKET;
1810 				tmp_cp->cmsg_type = SCM_TIMESTAMP;
1811 
1812 				if (proc_is64bit(p)) {
1813 					struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1814 
1815 					os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
1816 					os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
1817 
1818 					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1819 					tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1820 				} else {
1821 					struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1822 
1823 					tv32->tv_sec = (user32_time_t)tv->tv_sec;
1824 					tv32->tv_usec = tv->tv_usec;
1825 
1826 					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1827 					tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1828 				}
1829 				if (len >= tmp_space) {
1830 					tocopy = tmp_space;
1831 				} else {
1832 					*flags |= MSG_CTRUNC;
1833 					tocopy = len;
1834 				}
1835 				error = copyout(tmp_buffer, ctlbuf, tocopy);
1836 				if (error) {
1837 					goto out;
1838 				}
1839 			} else {
1840 				/* If socket has flow tracking and socket did not request address, ignore it */
1841 				if (SOFLOW_ENABLED(so) &&
1842 				    ((cp->cmsg_level == IPPROTO_IP && cp->cmsg_type == IP_RECVDSTADDR && inp != NULL &&
1843 				    !(inp->inp_flags & INP_RECVDSTADDR)) ||
1844 				    (cp->cmsg_level == IPPROTO_IPV6 && (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO) && inp &&
1845 				    !(inp->inp_flags & IN6P_PKTINFO)))) {
1846 					tocopy = 0;
1847 				} else {
1848 					if (cp_size > buflen) {
1849 						panic("cp_size > buflen, something"
1850 						    "wrong with alignment!");
1851 					}
1852 					if (len >= cp_size) {
1853 						tocopy = cp_size;
1854 					} else {
1855 						*flags |= MSG_CTRUNC;
1856 						tocopy = len;
1857 					}
1858 					error = copyout((caddr_t) cp, ctlbuf, tocopy);
1859 					if (error) {
1860 						goto out;
1861 					}
1862 					if (want_pktinfo && cp->cmsg_level == IPPROTO_IPV6 &&
1863 					    (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO)) {
1864 						seen_pktinfo = true;
1865 					}
1866 				}
1867 			}
1868 
1869 			ctlbuf += tocopy;
1870 			len -= tocopy;
1871 
1872 			buflen -= cp_size;
1873 			cp = (struct cmsghdr *)(void *)
1874 			    ((unsigned char *) cp + cp_size);
1875 			cp_size = CMSG_ALIGN(cp->cmsg_len);
1876 		}
1877 
1878 		m = m->m_next;
1879 	}
1880 	*controllen = (socklen_t)(ctlbuf - control);
1881 out:
1882 	if (want_pktinfo && !seen_pktinfo) {
1883 		missingpktinfo += 1;
1884 #if (DEBUG || DEVELOPMENT)
1885 		char pname[MAXCOMLEN];
1886 		char local[MAX_IPv6_STR_LEN + 6];
1887 		char remote[MAX_IPv6_STR_LEN + 6];
1888 
1889 		proc_name(so->last_pid, pname, sizeof(MAXCOMLEN));
1890 		if (inp->inp_vflag & INP_IPV6) {
1891 			inet_ntop(AF_INET6, &inp->in6p_laddr.s6_addr, local, sizeof(local));
1892 			inet_ntop(AF_INET6, &inp->in6p_faddr.s6_addr, remote, sizeof(local));
1893 		} else {
1894 			inet_ntop(AF_INET, &inp->inp_laddr.s_addr, local, sizeof(local));
1895 			inet_ntop(AF_INET, &inp->inp_faddr.s_addr, remote, sizeof(local));
1896 		}
1897 
1898 		os_log(OS_LOG_DEFAULT,
1899 		    "cmsg IPV6_PKTINFO missing for %s:%u > %s:%u proc %s.%u error %d\n",
1900 		    local, ntohs(inp->inp_lport), remote, ntohs(inp->inp_fport),
1901 		    pname, so->last_pid, error);
1902 #endif /* (DEBUG || DEVELOPMENT) */
1903 	}
1904 	return error;
1905 }
1906 
1907 /*
1908  * Returns:	0			Success
1909  *		ENOTSOCK
1910  *		EINVAL
1911  *		EBADF
1912  *		EACCES			Mandatory Access Control failure
1913  *	copyout:EFAULT
1914  *	fp_lookup:EBADF
1915  *	<pru_soreceive>:ENOBUFS
1916  *	<pru_soreceive>:ENOTCONN
1917  *	<pru_soreceive>:EWOULDBLOCK
1918  *	<pru_soreceive>:EFAULT
1919  *	<pru_soreceive>:EINTR
1920  *	<pru_soreceive>:EBADF
1921  *	<pru_soreceive>:EINVAL
1922  *	<pru_soreceive>:EMSGSIZE
1923  *	<pru_soreceive>:???
1924  *
1925  * Notes:	Additional return values from calls through <pru_soreceive>
1926  *		depend on protocols other than TCP or AF_UNIX, which are
1927  *		documented above.
1928  */
1929 static int
recvit(struct proc * p,int s,struct user_msghdr * mp,uio_t uiop,user_addr_t namelenp,int32_t * retval)1930 recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1931     user_addr_t namelenp, int32_t *retval)
1932 {
1933 	ssize_t len;
1934 	int error;
1935 	struct mbuf *control = 0;
1936 	struct socket *so;
1937 	struct sockaddr *fromsa = 0;
1938 	struct fileproc *fp;
1939 
1940 	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1941 	if ((error = fp_get_ftype(p, s, DTYPE_SOCKET, ENOTSOCK, &fp))) {
1942 		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1943 		return error;
1944 	}
1945 	so = (struct socket *)fp_get_data(fp);
1946 
1947 #if CONFIG_MACF_SOCKET_SUBSET
1948 	/*
1949 	 * We check the state without holding the socket lock;
1950 	 * if a race condition occurs, it would simply result
1951 	 * in an extra call to the MAC check function.
1952 	 */
1953 	if (!(so->so_state & SS_DEFUNCT) &&
1954 	    !(so->so_state & SS_ISCONNECTED) &&
1955 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1956 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
1957 		goto out1;
1958 	}
1959 #endif /* MAC_SOCKET_SUBSET */
1960 	if (uio_resid(uiop) < 0 || uio_resid(uiop) > INT_MAX) {
1961 		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1962 		error = EINVAL;
1963 		goto out1;
1964 	}
1965 
1966 	len = uio_resid(uiop);
1967 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1968 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1969 	    &mp->msg_flags);
1970 	if (fromsa) {
1971 		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1972 		    fromsa);
1973 	}
1974 	if (error) {
1975 		if (uio_resid(uiop) != len && (error == ERESTART ||
1976 		    error == EINTR || error == EWOULDBLOCK)) {
1977 			error = 0;
1978 		}
1979 	}
1980 	if (error) {
1981 		goto out;
1982 	}
1983 
1984 	*retval = (int32_t)(len - uio_resid(uiop));
1985 
1986 	if (mp->msg_name) {
1987 		error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1988 		if (error) {
1989 			goto out;
1990 		}
1991 		/* return the actual, untruncated address length */
1992 		if (namelenp &&
1993 		    (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
1994 		    sizeof(int)))) {
1995 			goto out;
1996 		}
1997 	}
1998 
1999 	if (mp->msg_control) {
2000 		error = copyout_control(p, control, mp->msg_control,
2001 		    &mp->msg_controllen, &mp->msg_flags, so);
2002 	}
2003 out:
2004 	free_sockaddr(fromsa);
2005 	if (control) {
2006 		m_freem(control);
2007 	}
2008 	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2009 out1:
2010 	fp_drop(p, s, fp, 0);
2011 	return error;
2012 }
2013 
2014 /*
2015  * Returns:	0			Success
2016  *		ENOMEM
2017  *	copyin:EFAULT
2018  *	recvit:???
2019  *	read:???			[4056224: applicable for pipes]
2020  *
2021  * Notes:	The read entry point is only called as part of support for
2022  *		binary backward compatability; new code should use read
2023  *		instead of recv or recvfrom when attempting to read data
2024  *		from pipes.
2025  *
2026  *		For full documentation of the return codes from recvit, see
2027  *		the block header for the recvit function.
2028  */
2029 int
recvfrom(struct proc * p,struct recvfrom_args * uap,int32_t * retval)2030 recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2031 {
2032 	__pthread_testcancel(1);
2033 	return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2034 	           retval);
2035 }
2036 
2037 int
recvfrom_nocancel(struct proc * p,struct recvfrom_nocancel_args * uap,int32_t * retval)2038 recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
2039     int32_t *retval)
2040 {
2041 	struct user_msghdr msg;
2042 	int error;
2043 	uio_t auio = NULL;
2044 
2045 	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2046 	AUDIT_ARG(fd, uap->s);
2047 
2048 	if (uap->fromlenaddr) {
2049 		error = copyin(uap->fromlenaddr,
2050 		    (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2051 		if (error) {
2052 			return error;
2053 		}
2054 	} else {
2055 		msg.msg_namelen = 0;
2056 	}
2057 	msg.msg_name = uap->from;
2058 	auio = uio_create(1, 0,
2059 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2060 	    UIO_READ);
2061 	if (auio == NULL) {
2062 		return ENOMEM;
2063 	}
2064 
2065 	uio_addiov(auio, uap->buf, uap->len);
2066 	/* no need to set up msg_iov.  recvit uses uio_t we send it */
2067 	msg.msg_iov = 0;
2068 	msg.msg_iovlen = 0;
2069 	msg.msg_control = 0;
2070 	msg.msg_controllen = 0;
2071 	msg.msg_flags = uap->flags;
2072 	error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2073 	if (auio != NULL) {
2074 		uio_free(auio);
2075 	}
2076 
2077 	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2078 
2079 	return error;
2080 }
2081 
2082 /*
2083  * Returns:	0			Success
2084  *		EMSGSIZE
2085  *		ENOMEM
2086  *	copyin:EFAULT
2087  *	copyout:EFAULT
2088  *	recvit:???
2089  *
2090  * Notes:	For full documentation of the return codes from recvit, see
2091  *		the block header for the recvit function.
2092  */
2093 int
recvmsg(struct proc * p,struct recvmsg_args * uap,int32_t * retval)2094 recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
2095 {
2096 	__pthread_testcancel(1);
2097 	return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2098 	           retval);
2099 }
2100 
2101 int
recvmsg_nocancel(struct proc * p,struct recvmsg_nocancel_args * uap,int32_t * retval)2102 recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2103     int32_t *retval)
2104 {
2105 	struct user32_msghdr msg32;
2106 	struct user64_msghdr msg64;
2107 	struct user_msghdr user_msg;
2108 	caddr_t msghdrp;
2109 	int     size_of_msghdr;
2110 	user_addr_t uiov;
2111 	int error;
2112 	uio_t auio = NULL;
2113 	struct user_iovec *iovp;
2114 
2115 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2116 
2117 	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2118 	AUDIT_ARG(fd, uap->s);
2119 	if (is_p_64bit_process) {
2120 		msghdrp = (caddr_t)&msg64;
2121 		size_of_msghdr = sizeof(msg64);
2122 	} else {
2123 		msghdrp = (caddr_t)&msg32;
2124 		size_of_msghdr = sizeof(msg32);
2125 	}
2126 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
2127 	if (error) {
2128 		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2129 		return error;
2130 	}
2131 
2132 	/* only need to copy if user process is not 64-bit */
2133 	if (is_p_64bit_process) {
2134 		user_msg.msg_flags = msg64.msg_flags;
2135 		user_msg.msg_controllen = msg64.msg_controllen;
2136 		user_msg.msg_control = (user_addr_t)msg64.msg_control;
2137 		user_msg.msg_iovlen = msg64.msg_iovlen;
2138 		user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
2139 		user_msg.msg_namelen = msg64.msg_namelen;
2140 		user_msg.msg_name = (user_addr_t)msg64.msg_name;
2141 	} else {
2142 		user_msg.msg_flags = msg32.msg_flags;
2143 		user_msg.msg_controllen = msg32.msg_controllen;
2144 		user_msg.msg_control = msg32.msg_control;
2145 		user_msg.msg_iovlen = msg32.msg_iovlen;
2146 		user_msg.msg_iov = msg32.msg_iov;
2147 		user_msg.msg_namelen = msg32.msg_namelen;
2148 		user_msg.msg_name = msg32.msg_name;
2149 	}
2150 
2151 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2152 		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2153 		    0, 0, 0, 0);
2154 		return EMSGSIZE;
2155 	}
2156 
2157 	user_msg.msg_flags = uap->flags;
2158 
2159 	/* allocate a uio large enough to hold the number of iovecs passed */
2160 	auio = uio_create(user_msg.msg_iovlen, 0,
2161 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
2162 	    UIO_READ);
2163 	if (auio == NULL) {
2164 		error = ENOMEM;
2165 		goto done;
2166 	}
2167 
2168 	/*
2169 	 * get location of iovecs within the uio.  then copyin the iovecs from
2170 	 * user space.
2171 	 */
2172 	iovp = uio_iovsaddr(auio);
2173 	if (iovp == NULL) {
2174 		error = ENOMEM;
2175 		goto done;
2176 	}
2177 	uiov = user_msg.msg_iov;
2178 	user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2179 	error = copyin_user_iovec_array(uiov,
2180 	    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2181 	    user_msg.msg_iovlen, iovp);
2182 	if (error) {
2183 		goto done;
2184 	}
2185 
2186 	/* finish setup of uio_t */
2187 	error = uio_calculateresid(auio);
2188 	if (error) {
2189 		goto done;
2190 	}
2191 
2192 	error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2193 	if (!error) {
2194 		user_msg.msg_iov = uiov;
2195 		if (is_p_64bit_process) {
2196 			msg64.msg_flags = user_msg.msg_flags;
2197 			msg64.msg_controllen = user_msg.msg_controllen;
2198 			msg64.msg_control = user_msg.msg_control;
2199 			msg64.msg_iovlen = user_msg.msg_iovlen;
2200 			msg64.msg_iov = user_msg.msg_iov;
2201 			msg64.msg_namelen = user_msg.msg_namelen;
2202 			msg64.msg_name = user_msg.msg_name;
2203 		} else {
2204 			msg32.msg_flags = user_msg.msg_flags;
2205 			msg32.msg_controllen = user_msg.msg_controllen;
2206 			msg32.msg_control = (user32_addr_t)user_msg.msg_control;
2207 			msg32.msg_iovlen = user_msg.msg_iovlen;
2208 			msg32.msg_iov = (user32_addr_t)user_msg.msg_iov;
2209 			msg32.msg_namelen = user_msg.msg_namelen;
2210 			msg32.msg_name = (user32_addr_t)user_msg.msg_name;
2211 		}
2212 		error = copyout(msghdrp, uap->msg, size_of_msghdr);
2213 	}
2214 done:
2215 	if (auio != NULL) {
2216 		uio_free(auio);
2217 	}
2218 	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2219 	return error;
2220 }
2221 
2222 int
recvmsg_x(struct proc * p,struct recvmsg_x_args * uap,user_ssize_t * retval)2223 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2224 {
2225 	int error = EOPNOTSUPP;
2226 	struct user_msghdr_x *user_msg_x = NULL;
2227 	struct recv_msg_elem *recv_msg_array = NULL;
2228 	struct socket *so;
2229 	user_ssize_t len_before = 0, len_after;
2230 	int need_drop = 0;
2231 	size_t size_of_msghdr;
2232 	void *umsgp = NULL;
2233 	u_int i;
2234 	u_int uiocnt;
2235 
2236 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2237 
2238 	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2239 
2240 	size_of_msghdr = is_p_64bit_process ?
2241 	    sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2242 
2243 	error = file_socket(uap->s, &so);
2244 	if (error) {
2245 		goto out;
2246 	}
2247 	need_drop = 1;
2248 	if (so == NULL) {
2249 		error = EBADF;
2250 		goto out;
2251 	}
2252 	/*
2253 	 * Support only a subset of message flags
2254 	 */
2255 	if (uap->flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA |  MSG_NBIO)) {
2256 		return EOPNOTSUPP;
2257 	}
2258 	/*
2259 	 * Input parameter range check
2260 	 */
2261 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2262 		error = EINVAL;
2263 		goto out;
2264 	}
2265 	if (uap->cnt > somaxrecvmsgx) {
2266 		uap->cnt = somaxrecvmsgx;
2267 	}
2268 
2269 	user_msg_x = kalloc_data(uap->cnt * sizeof(struct user_msghdr_x),
2270 	    Z_WAITOK | Z_ZERO);
2271 	if (user_msg_x == NULL) {
2272 		DBG_PRINTF("%s user_msg_x alloc failed\n", __func__);
2273 		error = ENOMEM;
2274 		goto out;
2275 	}
2276 	recv_msg_array = alloc_recv_msg_array(uap->cnt);
2277 	if (recv_msg_array == NULL) {
2278 		DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
2279 		error = ENOMEM;
2280 		goto out;
2281 	}
2282 
2283 	umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
2284 	if (umsgp == NULL) {
2285 		DBG_PRINTF("%s umsgp alloc failed\n", __func__);
2286 		error = ENOMEM;
2287 		goto out;
2288 	}
2289 	error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2290 	if (error) {
2291 		DBG_PRINTF("%s copyin() failed\n", __func__);
2292 		goto out;
2293 	}
2294 	error = internalize_recv_msghdr_array(umsgp,
2295 	    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2296 	    UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2297 	if (error) {
2298 		DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
2299 		goto out;
2300 	}
2301 	/*
2302 	 * Make sure the size of each message iovec and
2303 	 * the aggregate size of all the iovec is valid
2304 	 */
2305 	if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2306 		error = EINVAL;
2307 		goto out;
2308 	}
2309 	/*
2310 	 * Sanity check on passed arguments
2311 	 */
2312 	for (i = 0; i < uap->cnt; i++) {
2313 		struct user_msghdr_x *mp = user_msg_x + i;
2314 
2315 		if (mp->msg_flags != 0) {
2316 			error = EINVAL;
2317 			goto out;
2318 		}
2319 	}
2320 #if CONFIG_MACF_SOCKET_SUBSET
2321 	/*
2322 	 * We check the state without holding the socket lock;
2323 	 * if a race condition occurs, it would simply result
2324 	 * in an extra call to the MAC check function.
2325 	 */
2326 	if (!(so->so_state & SS_DEFUNCT) &&
2327 	    !(so->so_state & SS_ISCONNECTED) &&
2328 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2329 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2330 		goto out;
2331 	}
2332 #endif /* MAC_SOCKET_SUBSET */
2333 
2334 	len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2335 
2336 	if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2337 	    pru_soreceive_list_notsupp &&
2338 	    somaxrecvmsgx == 0) {
2339 		error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2340 		    recv_msg_array, uap->cnt, &uap->flags);
2341 	} else {
2342 		int flags = uap->flags;
2343 
2344 		for (i = 0; i < uap->cnt; i++) {
2345 			struct recv_msg_elem *recv_msg_elem;
2346 			uio_t auio;
2347 			struct sockaddr **psa;
2348 			struct mbuf **controlp;
2349 
2350 			recv_msg_elem = recv_msg_array + i;
2351 			auio = recv_msg_elem->uio;
2352 
2353 			/*
2354 			 * Do not block if we got at least one packet
2355 			 */
2356 			if (i > 0) {
2357 				flags |= MSG_DONTWAIT;
2358 			}
2359 
2360 			psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2361 			    &recv_msg_elem->psa : NULL;
2362 			controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2363 			    &recv_msg_elem->controlp : NULL;
2364 
2365 			error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2366 			    auio, (struct mbuf **)NULL, controlp, &flags);
2367 			if (error) {
2368 				break;
2369 			}
2370 			/*
2371 			 * We have some data
2372 			 */
2373 			recv_msg_elem->which |= SOCK_MSG_DATA;
2374 			/*
2375 			 * Set the messages flags for this packet
2376 			 */
2377 			flags &= ~MSG_DONTWAIT;
2378 			recv_msg_elem->flags = flags;
2379 			/*
2380 			 * Stop on partial copy
2381 			 */
2382 			if (recv_msg_elem->flags & (MSG_RCVMORE | MSG_TRUNC)) {
2383 				break;
2384 			}
2385 		}
2386 	}
2387 
2388 	len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2389 
2390 	if (error) {
2391 		if (len_after != len_before && (error == ERESTART ||
2392 		    error == EINTR || error == EWOULDBLOCK)) {
2393 			error = 0;
2394 		} else {
2395 			goto out;
2396 		}
2397 	}
2398 
2399 	uiocnt = externalize_recv_msghdr_array(p, so, umsgp,
2400 	    uap->cnt, user_msg_x, recv_msg_array, &error);
2401 	if (error != 0) {
2402 		goto out;
2403 	}
2404 
2405 	error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2406 	if (error) {
2407 		DBG_PRINTF("%s copyout() failed\n", __func__);
2408 		goto out;
2409 	}
2410 	*retval = (int)(uiocnt);
2411 
2412 out:
2413 	if (need_drop) {
2414 		file_drop(uap->s);
2415 	}
2416 	kfree_data(umsgp, uap->cnt * size_of_msghdr);
2417 	free_recv_msg_array(recv_msg_array, uap->cnt);
2418 	kfree_data(user_msg_x, uap->cnt * sizeof(struct user_msghdr_x));
2419 
2420 	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2421 
2422 	return error;
2423 }
2424 
2425 /*
2426  * Returns:	0			Success
2427  *		EBADF
2428  *	file_socket:ENOTSOCK
2429  *	file_socket:EBADF
2430  *	soshutdown:EINVAL
2431  *	soshutdown:ENOTCONN
2432  *	soshutdown:EADDRNOTAVAIL[TCP]
2433  *	soshutdown:ENOBUFS[TCP]
2434  *	soshutdown:EMSGSIZE[TCP]
2435  *	soshutdown:EHOSTUNREACH[TCP]
2436  *	soshutdown:ENETUNREACH[TCP]
2437  *	soshutdown:ENETDOWN[TCP]
2438  *	soshutdown:ENOMEM[TCP]
2439  *	soshutdown:EACCES[TCP]
2440  *	soshutdown:EMSGSIZE[TCP]
2441  *	soshutdown:ENOBUFS[TCP]
2442  *	soshutdown:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
2443  *	soshutdown:???			[other protocol families]
2444  */
2445 /* ARGSUSED */
2446 int
shutdown(__unused struct proc * p,struct shutdown_args * uap,__unused int32_t * retval)2447 shutdown(__unused struct proc *p, struct shutdown_args *uap,
2448     __unused int32_t *retval)
2449 {
2450 	struct socket *so;
2451 	int error;
2452 
2453 	AUDIT_ARG(fd, uap->s);
2454 	error = file_socket(uap->s, &so);
2455 	if (error) {
2456 		return error;
2457 	}
2458 	if (so == NULL) {
2459 		error = EBADF;
2460 		goto out;
2461 	}
2462 	error =  soshutdown((struct socket *)so, uap->how);
2463 out:
2464 	file_drop(uap->s);
2465 	return error;
2466 }
2467 
2468 /*
2469  * Returns:	0			Success
2470  *		EFAULT
2471  *		EINVAL
2472  *		EACCES			Mandatory Access Control failure
2473  *	file_socket:ENOTSOCK
2474  *	file_socket:EBADF
2475  *	sosetopt:EINVAL
2476  *	sosetopt:ENOPROTOOPT
2477  *	sosetopt:ENOBUFS
2478  *	sosetopt:EDOM
2479  *	sosetopt:EFAULT
2480  *	sosetopt:EOPNOTSUPP[AF_UNIX]
2481  *	sosetopt:???
2482  */
2483 /* ARGSUSED */
2484 int
setsockopt(struct proc * p,struct setsockopt_args * uap,__unused int32_t * retval)2485 setsockopt(struct proc *p, struct setsockopt_args *uap,
2486     __unused int32_t *retval)
2487 {
2488 	struct socket *so;
2489 	struct sockopt sopt;
2490 	int error;
2491 
2492 	AUDIT_ARG(fd, uap->s);
2493 	if (uap->val == 0 && uap->valsize != 0) {
2494 		return EFAULT;
2495 	}
2496 	/* No bounds checking on size (it's unsigned) */
2497 
2498 	error = file_socket(uap->s, &so);
2499 	if (error) {
2500 		return error;
2501 	}
2502 
2503 	sopt.sopt_dir = SOPT_SET;
2504 	sopt.sopt_level = uap->level;
2505 	sopt.sopt_name = uap->name;
2506 	sopt.sopt_val = uap->val;
2507 	sopt.sopt_valsize = uap->valsize;
2508 	sopt.sopt_p = p;
2509 
2510 	if (so == NULL) {
2511 		error = EINVAL;
2512 		goto out;
2513 	}
2514 #if CONFIG_MACF_SOCKET_SUBSET
2515 	if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2516 	    &sopt)) != 0) {
2517 		goto out;
2518 	}
2519 #endif /* MAC_SOCKET_SUBSET */
2520 	error = sosetoptlock(so, &sopt, 1);     /* will lock socket */
2521 out:
2522 	file_drop(uap->s);
2523 	return error;
2524 }
2525 
2526 
2527 
2528 /*
2529  * Returns:	0			Success
2530  *		EINVAL
2531  *		EBADF
2532  *		EACCES			Mandatory Access Control failure
2533  *	copyin:EFAULT
2534  *	copyout:EFAULT
2535  *	file_socket:ENOTSOCK
2536  *	file_socket:EBADF
2537  *	sogetopt:???
2538  */
2539 int
getsockopt(struct proc * p,struct getsockopt_args * uap,__unused int32_t * retval)2540 getsockopt(struct proc *p, struct getsockopt_args  *uap,
2541     __unused int32_t *retval)
2542 {
2543 	int             error;
2544 	socklen_t       valsize;
2545 	struct sockopt  sopt;
2546 	struct socket *so;
2547 
2548 	error = file_socket(uap->s, &so);
2549 	if (error) {
2550 		return error;
2551 	}
2552 	if (uap->val) {
2553 		error = copyin(uap->avalsize, (caddr_t)&valsize,
2554 		    sizeof(valsize));
2555 		if (error) {
2556 			goto out;
2557 		}
2558 		/* No bounds checking on size (it's unsigned) */
2559 	} else {
2560 		valsize = 0;
2561 	}
2562 	sopt.sopt_dir = SOPT_GET;
2563 	sopt.sopt_level = uap->level;
2564 	sopt.sopt_name = uap->name;
2565 	sopt.sopt_val = uap->val;
2566 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2567 	sopt.sopt_p = p;
2568 
2569 	if (so == NULL) {
2570 		error = EBADF;
2571 		goto out;
2572 	}
2573 #if CONFIG_MACF_SOCKET_SUBSET
2574 	if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2575 	    &sopt)) != 0) {
2576 		goto out;
2577 	}
2578 #endif /* MAC_SOCKET_SUBSET */
2579 	error = sogetoptlock((struct socket *)so, &sopt, 1);    /* will lock */
2580 	if (error == 0) {
2581 		valsize = (socklen_t)sopt.sopt_valsize;
2582 		error = copyout((caddr_t)&valsize, uap->avalsize,
2583 		    sizeof(valsize));
2584 	}
2585 out:
2586 	file_drop(uap->s);
2587 	return error;
2588 }
2589 
2590 
2591 /*
2592  * Get socket name.
2593  *
2594  * Returns:	0			Success
2595  *		EBADF
2596  *	file_socket:ENOTSOCK
2597  *	file_socket:EBADF
2598  *	copyin:EFAULT
2599  *	copyout:EFAULT
2600  *	<pru_sockaddr>:ENOBUFS[TCP]
2601  *	<pru_sockaddr>:ECONNRESET[TCP]
2602  *	<pru_sockaddr>:EINVAL[AF_UNIX]
2603  *	<sf_getsockname>:???
2604  */
2605 /* ARGSUSED */
2606 int
getsockname(__unused struct proc * p,struct getsockname_args * uap,__unused int32_t * retval)2607 getsockname(__unused struct proc *p, struct getsockname_args *uap,
2608     __unused int32_t *retval)
2609 {
2610 	struct socket *so;
2611 	struct sockaddr *sa;
2612 	socklen_t len;
2613 	socklen_t sa_len;
2614 	int error;
2615 
2616 	error = file_socket(uap->fdes, &so);
2617 	if (error) {
2618 		return error;
2619 	}
2620 	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2621 	if (error) {
2622 		goto out;
2623 	}
2624 	if (so == NULL) {
2625 		error = EBADF;
2626 		goto out;
2627 	}
2628 	sa = 0;
2629 	socket_lock(so, 1);
2630 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2631 	if (error == 0) {
2632 		error = sflt_getsockname(so, &sa);
2633 		if (error == EJUSTRETURN) {
2634 			error = 0;
2635 		}
2636 	}
2637 	socket_unlock(so, 1);
2638 	if (error) {
2639 		goto bad;
2640 	}
2641 	if (sa == 0) {
2642 		len = 0;
2643 		goto gotnothing;
2644 	}
2645 
2646 	sa_len = sa->sa_len;
2647 	len = MIN(len, sa_len);
2648 	error = copyout((caddr_t)sa, uap->asa, len);
2649 	if (error) {
2650 		goto bad;
2651 	}
2652 	/* return the actual, untruncated address length */
2653 	len = sa_len;
2654 gotnothing:
2655 	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2656 bad:
2657 	free_sockaddr(sa);
2658 out:
2659 	file_drop(uap->fdes);
2660 	return error;
2661 }
2662 
2663 /*
2664  * Get name of peer for connected socket.
2665  *
2666  * Returns:	0			Success
2667  *		EBADF
2668  *		EINVAL
2669  *		ENOTCONN
2670  *	file_socket:ENOTSOCK
2671  *	file_socket:EBADF
2672  *	copyin:EFAULT
2673  *	copyout:EFAULT
2674  *	<pru_peeraddr>:???
2675  *	<sf_getpeername>:???
2676  */
2677 /* ARGSUSED */
2678 int
getpeername(__unused struct proc * p,struct getpeername_args * uap,__unused int32_t * retval)2679 getpeername(__unused struct proc *p, struct getpeername_args *uap,
2680     __unused int32_t *retval)
2681 {
2682 	struct socket *so;
2683 	struct sockaddr *sa;
2684 	socklen_t len;
2685 	socklen_t sa_len;
2686 	int error;
2687 
2688 	error = file_socket(uap->fdes, &so);
2689 	if (error) {
2690 		return error;
2691 	}
2692 	if (so == NULL) {
2693 		error = EBADF;
2694 		goto out;
2695 	}
2696 
2697 	socket_lock(so, 1);
2698 
2699 	if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2700 	    (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2701 		/* the socket has been shutdown, no more getpeername's */
2702 		socket_unlock(so, 1);
2703 		error = EINVAL;
2704 		goto out;
2705 	}
2706 
2707 	if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
2708 		socket_unlock(so, 1);
2709 		error = ENOTCONN;
2710 		goto out;
2711 	}
2712 	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2713 	if (error) {
2714 		socket_unlock(so, 1);
2715 		goto out;
2716 	}
2717 	sa = 0;
2718 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2719 	if (error == 0) {
2720 		error = sflt_getpeername(so, &sa);
2721 		if (error == EJUSTRETURN) {
2722 			error = 0;
2723 		}
2724 	}
2725 	socket_unlock(so, 1);
2726 	if (error) {
2727 		goto bad;
2728 	}
2729 	if (sa == 0) {
2730 		len = 0;
2731 		goto gotnothing;
2732 	}
2733 	sa_len = sa->sa_len;
2734 	len = MIN(len, sa_len);
2735 	error = copyout(sa, uap->asa, len);
2736 	if (error) {
2737 		goto bad;
2738 	}
2739 	/* return the actual, untruncated address length */
2740 	len = sa_len;
2741 gotnothing:
2742 	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2743 bad:
2744 	free_sockaddr(sa);
2745 out:
2746 	file_drop(uap->fdes);
2747 	return error;
2748 }
2749 
2750 int
sockargs(struct mbuf ** mp,user_addr_t data,socklen_t buflen,int type)2751 sockargs(struct mbuf **mp, user_addr_t data, socklen_t buflen, int type)
2752 {
2753 	struct sockaddr *sa;
2754 	struct mbuf *m;
2755 	int error;
2756 	socklen_t alloc_buflen = buflen;
2757 
2758 	if (buflen > INT_MAX / 2) {
2759 		return EINVAL;
2760 	}
2761 	if (type == MT_SONAME && (buflen > SOCK_MAXADDRLEN ||
2762 	    buflen < offsetof(struct sockaddr, sa_data[0]))) {
2763 		return EINVAL;
2764 	}
2765 	if (type == MT_CONTROL && buflen < sizeof(struct cmsghdr)) {
2766 		return EINVAL;
2767 	}
2768 
2769 #ifdef __LP64__
2770 	/*
2771 	 * The fd's in the buffer must expand to be pointers, thus we need twice
2772 	 * as much space
2773 	 */
2774 	if (type == MT_CONTROL) {
2775 		alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
2776 		    sizeof(struct cmsghdr);
2777 	}
2778 #endif
2779 	if (alloc_buflen > MLEN) {
2780 		if (type == MT_SONAME && alloc_buflen <= 112) {
2781 			alloc_buflen = MLEN;    /* unix domain compat. hack */
2782 		} else if (alloc_buflen > MCLBYTES) {
2783 			return EINVAL;
2784 		}
2785 	}
2786 	m = m_get(M_WAIT, type);
2787 	if (m == NULL) {
2788 		return ENOBUFS;
2789 	}
2790 	if (alloc_buflen > MLEN) {
2791 		MCLGET(m, M_WAIT);
2792 		if ((m->m_flags & M_EXT) == 0) {
2793 			m_free(m);
2794 			return ENOBUFS;
2795 		}
2796 	}
2797 	/*
2798 	 * K64: We still copyin the original buflen because it gets expanded
2799 	 * later and we lie about the size of the mbuf because it only affects
2800 	 * unp_* functions
2801 	 */
2802 	m->m_len = buflen;
2803 	error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2804 	if (error) {
2805 		(void) m_free(m);
2806 	} else {
2807 		*mp = m;
2808 		if (type == MT_SONAME) {
2809 			sa = mtod(m, struct sockaddr *);
2810 			VERIFY(buflen <= SOCK_MAXADDRLEN);
2811 			sa->sa_len = (__uint8_t)buflen;
2812 		}
2813 	}
2814 	return error;
2815 }
2816 
2817 /*
2818  * Given a user_addr_t of length len, allocate and fill out a *sa.
2819  *
2820  * Returns:	0			Success
2821  *		ENAMETOOLONG		Filename too long
2822  *		EINVAL			Invalid argument
2823  *		ENOMEM			Not enough space
2824  *		copyin:EFAULT		Bad address
2825  */
2826 static int
getsockaddr(struct socket * so,struct sockaddr ** namp,user_addr_t uaddr,size_t len,boolean_t translate_unspec)2827 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
2828     size_t len, boolean_t translate_unspec)
2829 {
2830 	struct sockaddr *sa;
2831 	int error;
2832 
2833 	if (len > SOCK_MAXADDRLEN) {
2834 		return ENAMETOOLONG;
2835 	}
2836 
2837 	if (len < offsetof(struct sockaddr, sa_data[0])) {
2838 		return EINVAL;
2839 	}
2840 
2841 	sa = (struct sockaddr *)alloc_sockaddr(len, Z_WAITOK | Z_NOFAIL);
2842 
2843 	error = copyin(uaddr, (caddr_t)sa, len);
2844 	if (error) {
2845 		free_sockaddr(sa);
2846 	} else {
2847 		/*
2848 		 * Force sa_family to AF_INET on AF_INET sockets to handle
2849 		 * legacy applications that use AF_UNSPEC (0).  On all other
2850 		 * sockets we leave it unchanged and let the lower layer
2851 		 * handle it.
2852 		 */
2853 		if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2854 		    SOCK_CHECK_DOM(so, PF_INET) &&
2855 		    len == sizeof(struct sockaddr_in)) {
2856 			sa->sa_family = AF_INET;
2857 		}
2858 		VERIFY(len <= SOCK_MAXADDRLEN);
2859 		sa->sa_len = (__uint8_t)len;
2860 		*namp = sa;
2861 	}
2862 	return error;
2863 }
2864 
2865 static int
getsockaddr_s(struct socket * so,struct sockaddr_storage * ss,user_addr_t uaddr,size_t len,boolean_t translate_unspec)2866 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
2867     user_addr_t uaddr, size_t len, boolean_t translate_unspec)
2868 {
2869 	int error;
2870 
2871 	if (ss == NULL || uaddr == USER_ADDR_NULL ||
2872 	    len < offsetof(struct sockaddr, sa_data[0])) {
2873 		return EINVAL;
2874 	}
2875 
2876 	/*
2877 	 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2878 	 * so the check here is inclusive.
2879 	 */
2880 	if (len > sizeof(*ss)) {
2881 		return ENAMETOOLONG;
2882 	}
2883 
2884 	bzero(ss, sizeof(*ss));
2885 	error = copyin(uaddr, (caddr_t)ss, len);
2886 	if (error == 0) {
2887 		/*
2888 		 * Force sa_family to AF_INET on AF_INET sockets to handle
2889 		 * legacy applications that use AF_UNSPEC (0).  On all other
2890 		 * sockets we leave it unchanged and let the lower layer
2891 		 * handle it.
2892 		 */
2893 		if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2894 		    SOCK_CHECK_DOM(so, PF_INET) &&
2895 		    len == sizeof(struct sockaddr_in)) {
2896 			ss->ss_family = AF_INET;
2897 		}
2898 
2899 		ss->ss_len = (__uint8_t)len;
2900 	}
2901 	return error;
2902 }
2903 
2904 int
internalize_user_msghdr_array(const void * src,int spacetype,int direction,u_int count,struct user_msghdr_x * dst,struct uio ** uiop)2905 internalize_user_msghdr_array(const void *src, int spacetype, int direction,
2906     u_int count, struct user_msghdr_x *dst, struct uio **uiop)
2907 {
2908 	int error = 0;
2909 	u_int i;
2910 	u_int namecnt = 0;
2911 	u_int ctlcnt = 0;
2912 
2913 	for (i = 0; i < count; i++) {
2914 		uio_t auio;
2915 		struct user_iovec *iovp;
2916 		struct user_msghdr_x *user_msg = dst + i;
2917 
2918 		if (spacetype == UIO_USERSPACE64) {
2919 			const struct user64_msghdr_x *msghdr64;
2920 
2921 			msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2922 
2923 			user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
2924 			user_msg->msg_namelen = msghdr64->msg_namelen;
2925 			user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
2926 			user_msg->msg_iovlen = msghdr64->msg_iovlen;
2927 			user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
2928 			user_msg->msg_controllen = msghdr64->msg_controllen;
2929 			user_msg->msg_flags = msghdr64->msg_flags;
2930 			user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
2931 		} else {
2932 			const struct user32_msghdr_x *msghdr32;
2933 
2934 			msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2935 
2936 			user_msg->msg_name = msghdr32->msg_name;
2937 			user_msg->msg_namelen = msghdr32->msg_namelen;
2938 			user_msg->msg_iov = msghdr32->msg_iov;
2939 			user_msg->msg_iovlen = msghdr32->msg_iovlen;
2940 			user_msg->msg_control = msghdr32->msg_control;
2941 			user_msg->msg_controllen = msghdr32->msg_controllen;
2942 			user_msg->msg_flags = msghdr32->msg_flags;
2943 			user_msg->msg_datalen = msghdr32->msg_datalen;
2944 		}
2945 
2946 		if (user_msg->msg_iovlen <= 0 ||
2947 		    user_msg->msg_iovlen > UIO_MAXIOV) {
2948 			error = EMSGSIZE;
2949 			goto done;
2950 		}
2951 		auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2952 		    direction);
2953 		if (auio == NULL) {
2954 			error = ENOMEM;
2955 			goto done;
2956 		}
2957 		uiop[i] = auio;
2958 
2959 		iovp = uio_iovsaddr(auio);
2960 		if (iovp == NULL) {
2961 			error = ENOMEM;
2962 			goto done;
2963 		}
2964 		error = copyin_user_iovec_array(user_msg->msg_iov,
2965 		    spacetype, user_msg->msg_iovlen, iovp);
2966 		if (error) {
2967 			goto done;
2968 		}
2969 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2970 
2971 		error = uio_calculateresid(auio);
2972 		if (error) {
2973 			goto done;
2974 		}
2975 		user_msg->msg_datalen = uio_resid(auio);
2976 
2977 		if (user_msg->msg_name && user_msg->msg_namelen) {
2978 			namecnt++;
2979 		}
2980 		if (user_msg->msg_control && user_msg->msg_controllen) {
2981 			ctlcnt++;
2982 		}
2983 	}
2984 done:
2985 
2986 	return error;
2987 }
2988 
2989 int
internalize_recv_msghdr_array(const void * src,int spacetype,int direction,u_int count,struct user_msghdr_x * dst,struct recv_msg_elem * recv_msg_array)2990 internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
2991     u_int count, struct user_msghdr_x *dst,
2992     struct recv_msg_elem *recv_msg_array)
2993 {
2994 	int error = 0;
2995 	u_int i;
2996 
2997 	for (i = 0; i < count; i++) {
2998 		struct user_iovec *iovp;
2999 		struct user_msghdr_x *user_msg = dst + i;
3000 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3001 
3002 		if (spacetype == UIO_USERSPACE64) {
3003 			const struct user64_msghdr_x *msghdr64;
3004 
3005 			msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3006 
3007 			user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
3008 			user_msg->msg_namelen = msghdr64->msg_namelen;
3009 			user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
3010 			user_msg->msg_iovlen = msghdr64->msg_iovlen;
3011 			user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
3012 			user_msg->msg_controllen = msghdr64->msg_controllen;
3013 			user_msg->msg_flags = msghdr64->msg_flags;
3014 			user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
3015 		} else {
3016 			const struct user32_msghdr_x *msghdr32;
3017 
3018 			msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3019 
3020 			user_msg->msg_name = msghdr32->msg_name;
3021 			user_msg->msg_namelen = msghdr32->msg_namelen;
3022 			user_msg->msg_iov = msghdr32->msg_iov;
3023 			user_msg->msg_iovlen = msghdr32->msg_iovlen;
3024 			user_msg->msg_control = msghdr32->msg_control;
3025 			user_msg->msg_controllen = msghdr32->msg_controllen;
3026 			user_msg->msg_flags = msghdr32->msg_flags;
3027 			user_msg->msg_datalen = msghdr32->msg_datalen;
3028 		}
3029 
3030 		if (user_msg->msg_iovlen <= 0 ||
3031 		    user_msg->msg_iovlen > UIO_MAXIOV) {
3032 			error = EMSGSIZE;
3033 			goto done;
3034 		}
3035 		recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3036 		    spacetype, direction);
3037 		if (recv_msg_elem->uio == NULL) {
3038 			error = ENOMEM;
3039 			goto done;
3040 		}
3041 
3042 		iovp = uio_iovsaddr(recv_msg_elem->uio);
3043 		if (iovp == NULL) {
3044 			error = ENOMEM;
3045 			goto done;
3046 		}
3047 		error = copyin_user_iovec_array(user_msg->msg_iov,
3048 		    spacetype, user_msg->msg_iovlen, iovp);
3049 		if (error) {
3050 			goto done;
3051 		}
3052 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3053 
3054 		error = uio_calculateresid(recv_msg_elem->uio);
3055 		if (error) {
3056 			goto done;
3057 		}
3058 		user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3059 
3060 		if (user_msg->msg_name && user_msg->msg_namelen) {
3061 			recv_msg_elem->which |= SOCK_MSG_SA;
3062 		}
3063 		if (user_msg->msg_control && user_msg->msg_controllen) {
3064 			recv_msg_elem->which |= SOCK_MSG_CONTROL;
3065 		}
3066 	}
3067 done:
3068 
3069 	return error;
3070 }
3071 
3072 u_int
externalize_user_msghdr_array(void * dst,int spacetype,int direction,u_int count,const struct user_msghdr_x * src,struct uio ** uiop)3073 externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3074     u_int count, const struct user_msghdr_x *src, struct uio **uiop)
3075 {
3076 #pragma unused(direction)
3077 	u_int i;
3078 	int seenlast = 0;
3079 	u_int retcnt = 0;
3080 
3081 	for (i = 0; i < count; i++) {
3082 		const struct user_msghdr_x *user_msg = src + i;
3083 		uio_t auio = uiop[i];
3084 		user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3085 
3086 		if (user_msg->msg_datalen != 0 && len == 0) {
3087 			seenlast = 1;
3088 		}
3089 
3090 		if (seenlast == 0) {
3091 			retcnt++;
3092 		}
3093 
3094 		if (spacetype == UIO_USERSPACE64) {
3095 			struct user64_msghdr_x *msghdr64;
3096 
3097 			msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3098 
3099 			msghdr64->msg_flags = user_msg->msg_flags;
3100 			msghdr64->msg_datalen = len;
3101 		} else {
3102 			struct user32_msghdr_x *msghdr32;
3103 
3104 			msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3105 
3106 			msghdr32->msg_flags = user_msg->msg_flags;
3107 			msghdr32->msg_datalen = (user32_size_t)len;
3108 		}
3109 	}
3110 	return retcnt;
3111 }
3112 
3113 u_int
externalize_recv_msghdr_array(struct proc * p,struct socket * so,void * dst,u_int count,struct user_msghdr_x * src,struct recv_msg_elem * recv_msg_array,int * ret_error)3114 externalize_recv_msghdr_array(struct proc *p, struct socket *so, void *dst,
3115     u_int count, struct user_msghdr_x *src,
3116     struct recv_msg_elem *recv_msg_array, int *ret_error)
3117 {
3118 	u_int i;
3119 	u_int retcnt = 0;
3120 	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3121 
3122 	*ret_error = 0;
3123 
3124 	for (i = 0; i < count; i++) {
3125 		struct user_msghdr_x *user_msg = src + i;
3126 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3127 		user_ssize_t len = 0;
3128 		int error;
3129 
3130 		len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3131 
3132 		if ((recv_msg_elem->which & SOCK_MSG_DATA)) {
3133 			retcnt++;
3134 
3135 
3136 			if (recv_msg_elem->which & SOCK_MSG_SA) {
3137 				error = copyout_sa(recv_msg_elem->psa, user_msg->msg_name,
3138 				    &user_msg->msg_namelen);
3139 				if (error != 0) {
3140 					*ret_error = error;
3141 					return 0;
3142 				}
3143 			}
3144 			if (recv_msg_elem->which & SOCK_MSG_CONTROL) {
3145 				error = copyout_control(p, recv_msg_elem->controlp,
3146 				    user_msg->msg_control, &user_msg->msg_controllen,
3147 				    &recv_msg_elem->flags, so);
3148 				if (error != 0) {
3149 					*ret_error = error;
3150 					return 0;
3151 				}
3152 			}
3153 		}
3154 
3155 		if (spacetype == UIO_USERSPACE64) {
3156 			struct user64_msghdr_x *msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3157 
3158 			msghdr64->msg_namelen = user_msg->msg_namelen;
3159 			msghdr64->msg_controllen = user_msg->msg_controllen;
3160 			msghdr64->msg_flags = recv_msg_elem->flags;
3161 			msghdr64->msg_datalen = len;
3162 		} else {
3163 			struct user32_msghdr_x *msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3164 
3165 			msghdr32->msg_namelen = user_msg->msg_namelen;
3166 			msghdr32->msg_controllen = user_msg->msg_controllen;
3167 			msghdr32->msg_flags = recv_msg_elem->flags;
3168 			msghdr32->msg_datalen = (user32_size_t)len;
3169 		}
3170 	}
3171 	return retcnt;
3172 }
3173 
3174 void
free_uio_array(struct uio ** uiop,u_int count)3175 free_uio_array(struct uio **uiop, u_int count)
3176 {
3177 	u_int i;
3178 
3179 	for (i = 0; i < count; i++) {
3180 		if (uiop[i] != NULL) {
3181 			uio_free(uiop[i]);
3182 		}
3183 	}
3184 }
3185 
3186 __private_extern__ user_ssize_t
uio_array_resid(struct uio ** uiop,u_int count)3187 uio_array_resid(struct uio **uiop, u_int count)
3188 {
3189 	user_ssize_t len = 0;
3190 	u_int i;
3191 
3192 	for (i = 0; i < count; i++) {
3193 		struct uio *auio = uiop[i];
3194 
3195 		if (auio != NULL) {
3196 			len += uio_resid(auio);
3197 		}
3198 	}
3199 	return len;
3200 }
3201 
3202 static boolean_t
uio_array_is_valid(struct uio ** uiop,u_int count)3203 uio_array_is_valid(struct uio **uiop, u_int count)
3204 {
3205 	user_ssize_t len = 0;
3206 	u_int i;
3207 
3208 	for (i = 0; i < count; i++) {
3209 		struct uio *auio = uiop[i];
3210 
3211 		if (auio != NULL) {
3212 			user_ssize_t resid = uio_resid(auio);
3213 
3214 			/*
3215 			 * Sanity check on the validity of the iovec:
3216 			 * no point of going over sb_max
3217 			 */
3218 			if (resid < 0 || resid > (user_ssize_t)sb_max) {
3219 				return false;
3220 			}
3221 
3222 			len += resid;
3223 			if (len < 0 || len > (user_ssize_t)sb_max) {
3224 				return false;
3225 			}
3226 		}
3227 	}
3228 	return true;
3229 }
3230 
3231 
3232 struct recv_msg_elem *
alloc_recv_msg_array(u_int count)3233 alloc_recv_msg_array(u_int count)
3234 {
3235 	return kalloc_type(struct recv_msg_elem, count, Z_WAITOK | Z_ZERO);
3236 }
3237 
3238 void
free_recv_msg_array(struct recv_msg_elem * recv_msg_array,u_int count)3239 free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3240 {
3241 	if (recv_msg_array == NULL) {
3242 		return;
3243 	}
3244 	for (uint32_t i = 0; i < count; i++) {
3245 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3246 
3247 		if (recv_msg_elem->uio != NULL) {
3248 			uio_free(recv_msg_elem->uio);
3249 		}
3250 		free_sockaddr(recv_msg_elem->psa);
3251 		if (recv_msg_elem->controlp != NULL) {
3252 			m_freem(recv_msg_elem->controlp);
3253 		}
3254 	}
3255 	kfree_type(struct recv_msg_elem, count, recv_msg_array);
3256 }
3257 
3258 
3259 __private_extern__ user_ssize_t
recv_msg_array_resid(struct recv_msg_elem * recv_msg_array,u_int count)3260 recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3261 {
3262 	user_ssize_t len = 0;
3263 	u_int i;
3264 
3265 	for (i = 0; i < count; i++) {
3266 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3267 
3268 		if (recv_msg_elem->uio != NULL) {
3269 			len += uio_resid(recv_msg_elem->uio);
3270 		}
3271 	}
3272 	return len;
3273 }
3274 
3275 int
recv_msg_array_is_valid(struct recv_msg_elem * recv_msg_array,u_int count)3276 recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3277 {
3278 	user_ssize_t len = 0;
3279 	u_int i;
3280 
3281 	for (i = 0; i < count; i++) {
3282 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3283 
3284 		if (recv_msg_elem->uio != NULL) {
3285 			user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3286 
3287 			/*
3288 			 * Sanity check on the validity of the iovec:
3289 			 * no point of going over sb_max
3290 			 */
3291 			if (resid < 0 || (u_int32_t)resid > sb_max) {
3292 				return 0;
3293 			}
3294 
3295 			len += resid;
3296 			if (len < 0 || (u_int32_t)len > sb_max) {
3297 				return 0;
3298 			}
3299 		}
3300 	}
3301 	return 1;
3302 }
3303 
3304 #if SENDFILE
3305 
3306 #define SFUIOBUFS 64
3307 
3308 /* Macros to compute the number of mbufs needed depending on cluster size */
3309 #define HOWMANY_16K(n)  ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3310 #define HOWMANY_4K(n)   ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3311 
3312 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3313 #define SENDFILE_MAX_BYTES      (SFUIOBUFS << PGSHIFT)
3314 
3315 /* Upper send limit in the number of mbuf clusters */
3316 #define SENDFILE_MAX_16K        HOWMANY_16K(SENDFILE_MAX_BYTES)
3317 #define SENDFILE_MAX_4K         HOWMANY_4K(SENDFILE_MAX_BYTES)
3318 
3319 static void
alloc_sendpkt(int how,size_t pktlen,unsigned int * maxchunks,struct mbuf ** m,boolean_t jumbocl)3320 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3321     struct mbuf **m, boolean_t jumbocl)
3322 {
3323 	unsigned int needed;
3324 
3325 	if (pktlen == 0) {
3326 		panic("%s: pktlen (%ld) must be non-zero", __func__, pktlen);
3327 	}
3328 
3329 	/*
3330 	 * Try to allocate for the whole thing.  Since we want full control
3331 	 * over the buffer size and be able to accept partial result, we can't
3332 	 * use mbuf_allocpacket().  The logic below is similar to sosend().
3333 	 */
3334 	*m = NULL;
3335 	if (pktlen > MBIGCLBYTES && jumbocl) {
3336 		needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3337 		*m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3338 	}
3339 	if (*m == NULL) {
3340 		needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3341 		*m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3342 	}
3343 
3344 	/*
3345 	 * Our previous attempt(s) at allocation had failed; the system
3346 	 * may be short on mbufs, and we want to block until they are
3347 	 * available.  This time, ask just for 1 mbuf and don't return
3348 	 * until we get it.
3349 	 */
3350 	if (*m == NULL) {
3351 		needed = 1;
3352 		*m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3353 	}
3354 	if (*m == NULL) {
3355 		panic("%s: blocking allocation returned NULL", __func__);
3356 	}
3357 
3358 	*maxchunks = needed;
3359 }
3360 
3361 /*
3362  * sendfile(2).
3363  * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3364  *	 struct sf_hdtr *hdtr, int flags)
3365  *
3366  * Send a file specified by 'fd' and starting at 'offset' to a socket
3367  * specified by 's'. Send only '*nbytes' of the file or until EOF if
3368  * *nbytes == 0. Optionally add a header and/or trailer to the socket
3369  * output. If specified, write the total number of bytes sent into *nbytes.
3370  */
3371 int
sendfile(struct proc * p,struct sendfile_args * uap,__unused int * retval)3372 sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
3373 {
3374 	struct fileproc *fp;
3375 	struct vnode *vp;
3376 	struct socket *so;
3377 	struct writev_nocancel_args nuap;
3378 	user_ssize_t writev_retval;
3379 	struct user_sf_hdtr user_hdtr;
3380 	struct user32_sf_hdtr user32_hdtr;
3381 	struct user64_sf_hdtr user64_hdtr;
3382 	off_t off, xfsize;
3383 	off_t nbytes = 0, sbytes = 0;
3384 	int error = 0;
3385 	size_t sizeof_hdtr;
3386 	off_t file_size;
3387 	struct vfs_context context = *vfs_context_current();
3388 
3389 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
3390 
3391 	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3392 	    0, 0, 0, 0);
3393 
3394 	AUDIT_ARG(fd, uap->fd);
3395 	AUDIT_ARG(value32, uap->s);
3396 
3397 	/*
3398 	 * Do argument checking. Must be a regular file in, stream
3399 	 * type and connected socket out, positive offset.
3400 	 */
3401 	if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3402 		goto done;
3403 	}
3404 	if ((fp->f_flag & FREAD) == 0) {
3405 		error = EBADF;
3406 		goto done1;
3407 	}
3408 	if (vnode_isreg(vp) == 0) {
3409 		error = ENOTSUP;
3410 		goto done1;
3411 	}
3412 	error = file_socket(uap->s, &so);
3413 	if (error) {
3414 		goto done1;
3415 	}
3416 	if (so == NULL) {
3417 		error = EBADF;
3418 		goto done2;
3419 	}
3420 	if (so->so_type != SOCK_STREAM) {
3421 		error = EINVAL;
3422 		goto done2;
3423 	}
3424 	if ((so->so_state & SS_ISCONNECTED) == 0) {
3425 		error = ENOTCONN;
3426 		goto done2;
3427 	}
3428 	if (uap->offset < 0) {
3429 		error = EINVAL;
3430 		goto done2;
3431 	}
3432 	if (uap->nbytes == USER_ADDR_NULL) {
3433 		error = EINVAL;
3434 		goto done2;
3435 	}
3436 	if (uap->flags != 0) {
3437 		error = EINVAL;
3438 		goto done2;
3439 	}
3440 
3441 	context.vc_ucred = fp->fp_glob->fg_cred;
3442 
3443 #if CONFIG_MACF_SOCKET_SUBSET
3444 	/* JMM - fetch connected sockaddr? */
3445 	error = mac_socket_check_send(context.vc_ucred, so, NULL);
3446 	if (error) {
3447 		goto done2;
3448 	}
3449 #endif
3450 
3451 	/*
3452 	 * Get number of bytes to send
3453 	 * Should it applies to size of header and trailer?
3454 	 */
3455 	error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3456 	if (error) {
3457 		goto done2;
3458 	}
3459 
3460 	/*
3461 	 * If specified, get the pointer to the sf_hdtr struct for
3462 	 * any headers/trailers.
3463 	 */
3464 	if (uap->hdtr != USER_ADDR_NULL) {
3465 		caddr_t hdtrp;
3466 
3467 		bzero(&user_hdtr, sizeof(user_hdtr));
3468 		if (is_p_64bit_process) {
3469 			hdtrp = (caddr_t)&user64_hdtr;
3470 			sizeof_hdtr = sizeof(user64_hdtr);
3471 		} else {
3472 			hdtrp = (caddr_t)&user32_hdtr;
3473 			sizeof_hdtr = sizeof(user32_hdtr);
3474 		}
3475 		error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3476 		if (error) {
3477 			goto done2;
3478 		}
3479 		if (is_p_64bit_process) {
3480 			user_hdtr.headers = user64_hdtr.headers;
3481 			user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3482 			user_hdtr.trailers = user64_hdtr.trailers;
3483 			user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3484 		} else {
3485 			user_hdtr.headers = user32_hdtr.headers;
3486 			user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3487 			user_hdtr.trailers = user32_hdtr.trailers;
3488 			user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3489 		}
3490 
3491 		/*
3492 		 * Send any headers. Wimp out and use writev(2).
3493 		 */
3494 		if (user_hdtr.headers != USER_ADDR_NULL) {
3495 			bzero(&nuap, sizeof(struct writev_args));
3496 			nuap.fd = uap->s;
3497 			nuap.iovp = user_hdtr.headers;
3498 			nuap.iovcnt = user_hdtr.hdr_cnt;
3499 			error = writev_nocancel(p, &nuap, &writev_retval);
3500 			if (error) {
3501 				goto done2;
3502 			}
3503 			sbytes += writev_retval;
3504 		}
3505 	}
3506 
3507 	/*
3508 	 * Get the file size for 2 reasons:
3509 	 *  1. We don't want to allocate more mbufs than necessary
3510 	 *  2. We don't want to read past the end of file
3511 	 */
3512 	if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3513 		goto done2;
3514 	}
3515 
3516 	/*
3517 	 * Simply read file data into a chain of mbufs that used with scatter
3518 	 * gather reads. We're not (yet?) setup to use zero copy external
3519 	 * mbufs that point to the file pages.
3520 	 */
3521 	socket_lock(so, 1);
3522 	error = sblock(&so->so_snd, SBL_WAIT);
3523 	if (error) {
3524 		socket_unlock(so, 1);
3525 		goto done2;
3526 	}
3527 	for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3528 		mbuf_t  m0 = NULL, m;
3529 		unsigned int    nbufs = SFUIOBUFS, i;
3530 		uio_t   auio;
3531 		uio_stackbuf_t    uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
3532 		size_t  uiolen;
3533 		user_ssize_t    rlen;
3534 		off_t   pgoff;
3535 		size_t  pktlen;
3536 		boolean_t jumbocl;
3537 
3538 		/*
3539 		 * Calculate the amount to transfer.
3540 		 * Align to round number of pages.
3541 		 * Not to exceed send socket buffer,
3542 		 * the EOF, or the passed in nbytes.
3543 		 */
3544 		xfsize = sbspace(&so->so_snd);
3545 
3546 		if (xfsize <= 0) {
3547 			if (so->so_state & SS_CANTSENDMORE) {
3548 				error = EPIPE;
3549 				goto done3;
3550 			} else if ((so->so_state & SS_NBIO)) {
3551 				error = EAGAIN;
3552 				goto done3;
3553 			} else {
3554 				xfsize = PAGE_SIZE;
3555 			}
3556 		}
3557 
3558 		if (xfsize > SENDFILE_MAX_BYTES) {
3559 			xfsize = SENDFILE_MAX_BYTES;
3560 		} else if (xfsize > PAGE_SIZE) {
3561 			xfsize = trunc_page(xfsize);
3562 		}
3563 		pgoff = off & PAGE_MASK_64;
3564 		if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3565 			xfsize = PAGE_SIZE_64 - pgoff;
3566 		}
3567 		if (nbytes && xfsize > (nbytes - sbytes)) {
3568 			xfsize = nbytes - sbytes;
3569 		}
3570 		if (xfsize <= 0) {
3571 			break;
3572 		}
3573 		if (off + xfsize > file_size) {
3574 			xfsize = file_size - off;
3575 		}
3576 		if (xfsize <= 0) {
3577 			break;
3578 		}
3579 
3580 		/*
3581 		 * Attempt to use larger than system page-size clusters for
3582 		 * large writes only if there is a jumbo cluster pool and
3583 		 * if the socket is marked accordingly.
3584 		 */
3585 		jumbocl = sosendjcl && njcl > 0 &&
3586 		    ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3587 
3588 		socket_unlock(so, 0);
3589 		alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3590 		pktlen = mbuf_pkthdr_maxlen(m0);
3591 		if (pktlen < (size_t)xfsize) {
3592 			xfsize = pktlen;
3593 		}
3594 
3595 		auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3596 		    UIO_READ, &uio_buf[0], sizeof(uio_buf));
3597 		if (auio == NULL) {
3598 			printf("sendfile failed. nbufs = %d. %s", nbufs,
3599 			    "File a radar related to rdar://10146739.\n");
3600 			mbuf_freem(m0);
3601 			error = ENXIO;
3602 			socket_lock(so, 0);
3603 			goto done3;
3604 		}
3605 
3606 		for (i = 0, m = m0, uiolen = 0;
3607 		    i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3608 		    i++, m = mbuf_next(m)) {
3609 			size_t mlen = mbuf_maxlen(m);
3610 
3611 			if (mlen + uiolen > (size_t)xfsize) {
3612 				mlen = xfsize - uiolen;
3613 			}
3614 			mbuf_setlen(m, mlen);
3615 			uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3616 			    mlen);
3617 			uiolen += mlen;
3618 		}
3619 
3620 		if (xfsize != uio_resid(auio)) {
3621 			printf("sendfile: xfsize: %lld != uio_resid(auio): "
3622 			    "%lld\n", xfsize, (long long)uio_resid(auio));
3623 		}
3624 
3625 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3626 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3627 		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3628 		error = fo_read(fp, auio, FOF_OFFSET, &context);
3629 		socket_lock(so, 0);
3630 		if (error != 0) {
3631 			if (uio_resid(auio) != xfsize && (error == ERESTART ||
3632 			    error == EINTR || error == EWOULDBLOCK)) {
3633 				error = 0;
3634 			} else {
3635 				mbuf_freem(m0);
3636 				goto done3;
3637 			}
3638 		}
3639 		xfsize -= uio_resid(auio);
3640 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3641 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3642 		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3643 
3644 		if (xfsize == 0) {
3645 			// printf("sendfile: fo_read 0 bytes, EOF\n");
3646 			break;
3647 		}
3648 		if (xfsize + off > file_size) {
3649 			printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3650 			    "%lld\n", xfsize, off, file_size);
3651 		}
3652 		for (i = 0, m = m0, rlen = 0;
3653 		    i < nbufs && m != NULL && rlen < xfsize;
3654 		    i++, m = mbuf_next(m)) {
3655 			size_t mlen = mbuf_maxlen(m);
3656 
3657 			if (rlen + mlen > (size_t)xfsize) {
3658 				mlen = xfsize - rlen;
3659 			}
3660 			mbuf_setlen(m, mlen);
3661 
3662 			rlen += mlen;
3663 		}
3664 		mbuf_pkthdr_setlen(m0, xfsize);
3665 
3666 retry_space:
3667 		/*
3668 		 * Make sure that the socket is still able to take more data.
3669 		 * CANTSENDMORE being true usually means that the connection
3670 		 * was closed. so_error is true when an error was sensed after
3671 		 * a previous send.
3672 		 * The state is checked after the page mapping and buffer
3673 		 * allocation above since those operations may block and make
3674 		 * any socket checks stale. From this point forward, nothing
3675 		 * blocks before the pru_send (or more accurately, any blocking
3676 		 * results in a loop back to here to re-check).
3677 		 */
3678 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3679 			if (so->so_state & SS_CANTSENDMORE) {
3680 				error = EPIPE;
3681 			} else {
3682 				error = so->so_error;
3683 				so->so_error = 0;
3684 			}
3685 			m_freem(m0);
3686 			goto done3;
3687 		}
3688 		/*
3689 		 * Wait for socket space to become available. We do this just
3690 		 * after checking the connection state above in order to avoid
3691 		 * a race condition with sbwait().
3692 		 */
3693 		if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
3694 			if (so->so_state & SS_NBIO) {
3695 				m_freem(m0);
3696 				error = EAGAIN;
3697 				goto done3;
3698 			}
3699 			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3700 			    DBG_FUNC_START), uap->s, 0, 0, 0, 0);
3701 			error = sbwait(&so->so_snd);
3702 			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3703 			    DBG_FUNC_END), uap->s, 0, 0, 0, 0);
3704 			/*
3705 			 * An error from sbwait usually indicates that we've
3706 			 * been interrupted by a signal. If we've sent anything
3707 			 * then return bytes sent, otherwise return the error.
3708 			 */
3709 			if (error) {
3710 				m_freem(m0);
3711 				goto done3;
3712 			}
3713 			goto retry_space;
3714 		}
3715 
3716 		struct mbuf *control = NULL;
3717 		{
3718 			/*
3719 			 * Socket filter processing
3720 			 */
3721 
3722 			error = sflt_data_out(so, NULL, &m0, &control, 0);
3723 			if (error) {
3724 				if (error == EJUSTRETURN) {
3725 					error = 0;
3726 					continue;
3727 				}
3728 				goto done3;
3729 			}
3730 			/*
3731 			 * End Socket filter processing
3732 			 */
3733 		}
3734 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3735 		    uap->s, 0, 0, 0, 0);
3736 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
3737 		    0, control, p);
3738 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3739 		    uap->s, 0, 0, 0, 0);
3740 		if (error) {
3741 			goto done3;
3742 		}
3743 	}
3744 	sbunlock(&so->so_snd, FALSE);   /* will unlock socket */
3745 	/*
3746 	 * Send trailers. Wimp out and use writev(2).
3747 	 */
3748 	if (uap->hdtr != USER_ADDR_NULL &&
3749 	    user_hdtr.trailers != USER_ADDR_NULL) {
3750 		bzero(&nuap, sizeof(struct writev_args));
3751 		nuap.fd = uap->s;
3752 		nuap.iovp = user_hdtr.trailers;
3753 		nuap.iovcnt = user_hdtr.trl_cnt;
3754 		error = writev_nocancel(p, &nuap, &writev_retval);
3755 		if (error) {
3756 			goto done2;
3757 		}
3758 		sbytes += writev_retval;
3759 	}
3760 done2:
3761 	file_drop(uap->s);
3762 done1:
3763 	file_drop(uap->fd);
3764 done:
3765 	if (uap->nbytes != USER_ADDR_NULL) {
3766 		/* XXX this appears bogus for some early failure conditions */
3767 		copyout(&sbytes, uap->nbytes, sizeof(off_t));
3768 	}
3769 	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3770 	    (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3771 	    (unsigned int)(sbytes & 0x0ffffffff), error, 0);
3772 	return error;
3773 done3:
3774 	sbunlock(&so->so_snd, FALSE);   /* will unlock socket */
3775 	goto done2;
3776 }
3777 
3778 
3779 #endif /* SENDFILE */
3780