xref: /xnu-8019.80.24/bsd/kern/uipc_syscalls.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1989, 1990, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * sendfile(2) and related extensions:
33  * Copyright (c) 1998, David Greenman. All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgement:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  * 4. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  *
63  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
64  */
65 /*
66  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67  * support for mandatory and extensible security protections.  This notice
68  * is included in support of clause 2.2 (b) of the Apple Public License,
69  * Version 2.0.
70  */
71 
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mcache.h>
80 #include <sys/mbuf.h>
81 #include <kern/locks.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/signalvar.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/kernel.h>
88 #include <sys/uio_internal.h>
89 #include <sys/kauth.h>
90 #include <kern/task.h>
91 #include <sys/priv.h>
92 #include <sys/sysctl.h>
93 #include <sys/sys_domain.h>
94 
95 #include <security/audit/audit.h>
96 
97 #include <sys/kdebug.h>
98 #include <sys/sysproto.h>
99 #include <netinet/in.h>
100 #include <net/route.h>
101 #include <netinet/in_pcb.h>
102 
103 #include <os/log.h>
104 #include <os/ptrtools.h>
105 
106 #include <os/log.h>
107 
108 #if CONFIG_MACF_SOCKET_SUBSET
109 #include <security/mac_framework.h>
110 #endif /* MAC_SOCKET_SUBSET */
111 
112 #define f_flag fp_glob->fg_flag
113 #define f_ops fp_glob->fg_ops
114 
115 #define DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETSOCK, 0)
116 #define DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETSOCK, 2)
117 #define DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETSOCK, 1)
118 #define DBG_LAYER_OUT_END       NETDBG_CODE(DBG_NETSOCK, 3)
119 #define DBG_FNC_SENDMSG         NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
120 #define DBG_FNC_SENDTO          NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
121 #define DBG_FNC_SENDIT          NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
122 #define DBG_FNC_RECVFROM        NETDBG_CODE(DBG_NETSOCK, (5 << 8))
123 #define DBG_FNC_RECVMSG         NETDBG_CODE(DBG_NETSOCK, (6 << 8))
124 #define DBG_FNC_RECVIT          NETDBG_CODE(DBG_NETSOCK, (7 << 8))
125 #define DBG_FNC_SENDFILE        NETDBG_CODE(DBG_NETSOCK, (10 << 8))
126 #define DBG_FNC_SENDFILE_WAIT   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
127 #define DBG_FNC_SENDFILE_READ   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
128 #define DBG_FNC_SENDFILE_SEND   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
129 #define DBG_FNC_SENDMSG_X       NETDBG_CODE(DBG_NETSOCK, (11 << 8))
130 #define DBG_FNC_RECVMSG_X       NETDBG_CODE(DBG_NETSOCK, (12 << 8))
131 
132 #if DEBUG || DEVELOPMENT
133 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
134 #define DBG_PRINTF(...) printf(__VA_ARGS__)
135 #else
136 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
137 #define DBG_PRINTF(...) do { } while (0)
138 #endif
139 
140 static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
141     int, int32_t *);
142 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
143     int32_t *);
144 static int connectit(struct socket *, struct sockaddr *);
145 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
146     size_t, boolean_t);
147 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
148     user_addr_t, size_t, boolean_t);
149 #if SENDFILE
150 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
151     boolean_t);
152 #endif /* SENDFILE */
153 static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
154 static int connectitx(struct socket *, struct sockaddr *,
155     struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
156     sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
157 static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
158     int *);
159 static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
160 
161 static int internalize_user_msghdr_array(const void *, int, int, u_int,
162     struct user_msghdr_x *, struct uio **);
163 static u_int externalize_user_msghdr_array(void *, int, int, u_int,
164     const struct user_msghdr_x *, struct uio **);
165 
166 static void free_uio_array(struct uio **, u_int);
167 static boolean_t uio_array_is_valid(struct uio **, u_int);
168 static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
169 static int internalize_recv_msghdr_array(const void *, int, int,
170     u_int, struct user_msghdr_x *, struct recv_msg_elem *);
171 static u_int externalize_recv_msghdr_array(struct proc *, struct socket *, void *, u_int,
172     struct user_msghdr_x *, struct recv_msg_elem *, int *);
173 static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
174 static void free_recv_msg_array(struct recv_msg_elem *, u_int);
175 
176 SYSCTL_DECL(_kern_ipc);
177 
178 static u_int somaxsendmsgx = 100;
179 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
180     CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
181 static u_int somaxrecvmsgx = 100;
182 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
183     CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
184 
185 static u_int missingpktinfo = 0;
186 SYSCTL_UINT(_kern_ipc, OID_AUTO, missingpktinfo,
187     CTLFLAG_RD | CTLFLAG_LOCKED, &missingpktinfo, 0, "");
188 
189 /*
190  * System call interface to the socket abstraction.
191  */
192 
193 extern const struct fileops socketops;
194 
195 /*
196  * Returns:	0			Success
197  *		EACCES			Mandatory Access Control failure
198  *	falloc:ENFILE
199  *	falloc:EMFILE
200  *	falloc:ENOMEM
201  *	socreate:EAFNOSUPPORT
202  *	socreate:EPROTOTYPE
203  *	socreate:EPROTONOSUPPORT
204  *	socreate:ENOBUFS
205  *	socreate:ENOMEM
206  *	socreate:???			[other protocol families, IPSEC]
207  */
208 int
socket(struct proc * p,struct socket_args * uap,int32_t * retval)209 socket(struct proc *p,
210     struct socket_args *uap,
211     int32_t *retval)
212 {
213 	return socket_common(p, uap->domain, uap->type, uap->protocol,
214 	           proc_selfpid(), retval, 0);
215 }
216 
217 int
socket_delegate(struct proc * p,struct socket_delegate_args * uap,int32_t * retval)218 socket_delegate(struct proc *p,
219     struct socket_delegate_args *uap,
220     int32_t *retval)
221 {
222 	return socket_common(p, uap->domain, uap->type, uap->protocol,
223 	           uap->epid, retval, 1);
224 }
225 
226 static int
socket_common(struct proc * p,int domain,int type,int protocol,pid_t epid,int32_t * retval,int delegate)227 socket_common(struct proc *p,
228     int domain,
229     int type,
230     int protocol,
231     pid_t epid,
232     int32_t *retval,
233     int delegate)
234 {
235 	struct socket *so;
236 	struct fileproc *fp;
237 	int fd, error;
238 
239 	AUDIT_ARG(socket, domain, type, protocol);
240 #if CONFIG_MACF_SOCKET_SUBSET
241 	if ((error = mac_socket_check_create(kauth_cred_get(), domain,
242 	    type, protocol)) != 0) {
243 		return error;
244 	}
245 #endif /* MAC_SOCKET_SUBSET */
246 
247 	if (delegate) {
248 		error = priv_check_cred(kauth_cred_get(),
249 		    PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
250 		if (error) {
251 			return EACCES;
252 		}
253 	}
254 
255 	error = falloc(p, &fp, &fd, vfs_context_current());
256 	if (error) {
257 		return error;
258 	}
259 	fp->f_flag = FREAD | FWRITE;
260 	fp->f_ops = &socketops;
261 
262 	if (delegate) {
263 		error = socreate_delegate(domain, &so, type, protocol, epid);
264 	} else {
265 		error = socreate(domain, &so, type, protocol);
266 	}
267 
268 	if (error) {
269 		fp_free(p, fd, fp);
270 	} else {
271 		fp_set_data(fp, so);
272 
273 		proc_fdlock(p);
274 		procfdtbl_releasefd(p, fd, NULL);
275 
276 		fp_drop(p, fd, fp, 1);
277 		proc_fdunlock(p);
278 
279 		*retval = fd;
280 		if (ENTR_SHOULDTRACE) {
281 			KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
282 			    fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
283 		}
284 	}
285 	return error;
286 }
287 
288 /*
289  * Returns:	0			Success
290  *		EDESTADDRREQ		Destination address required
291  *		EBADF			Bad file descriptor
292  *		EACCES			Mandatory Access Control failure
293  *	file_socket:ENOTSOCK
294  *	file_socket:EBADF
295  *	getsockaddr:ENAMETOOLONG	Filename too long
296  *	getsockaddr:EINVAL		Invalid argument
297  *	getsockaddr:ENOMEM		Not enough space
298  *	getsockaddr:EFAULT		Bad address
299  *	sobindlock:???
300  */
301 /* ARGSUSED */
302 int
bind(__unused proc_t p,struct bind_args * uap,__unused int32_t * retval)303 bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
304 {
305 	struct sockaddr_storage ss;
306 	struct sockaddr *sa = NULL;
307 	struct socket *so;
308 	boolean_t want_free = TRUE;
309 	int error;
310 
311 	AUDIT_ARG(fd, uap->s);
312 	error = file_socket(uap->s, &so);
313 	if (error != 0) {
314 		return error;
315 	}
316 	if (so == NULL) {
317 		error = EBADF;
318 		goto out;
319 	}
320 	if (uap->name == USER_ADDR_NULL) {
321 		error = EDESTADDRREQ;
322 		goto out;
323 	}
324 	if (uap->namelen > sizeof(ss)) {
325 		error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
326 	} else {
327 		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
328 		if (error == 0) {
329 			sa = (struct sockaddr *)&ss;
330 			want_free = FALSE;
331 		}
332 	}
333 	if (error != 0) {
334 		goto out;
335 	}
336 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
337 #if CONFIG_MACF_SOCKET_SUBSET
338 	if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
339 	    (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
340 		error = sobindlock(so, sa, 1);  /* will lock socket */
341 	}
342 #else
343 	error = sobindlock(so, sa, 1);          /* will lock socket */
344 #endif /* MAC_SOCKET_SUBSET */
345 	if (want_free) {
346 		free_sockaddr(sa);
347 	}
348 out:
349 	file_drop(uap->s);
350 	return error;
351 }
352 
353 /*
354  * Returns:	0			Success
355  *		EBADF
356  *		EACCES			Mandatory Access Control failure
357  *	file_socket:ENOTSOCK
358  *	file_socket:EBADF
359  *	solisten:EINVAL
360  *	solisten:EOPNOTSUPP
361  *	solisten:???
362  */
363 int
listen(__unused struct proc * p,struct listen_args * uap,__unused int32_t * retval)364 listen(__unused struct proc *p, struct listen_args *uap,
365     __unused int32_t *retval)
366 {
367 	int error;
368 	struct socket *so;
369 
370 	AUDIT_ARG(fd, uap->s);
371 	error = file_socket(uap->s, &so);
372 	if (error) {
373 		return error;
374 	}
375 	if (so != NULL)
376 #if CONFIG_MACF_SOCKET_SUBSET
377 	{
378 		error = mac_socket_check_listen(kauth_cred_get(), so);
379 		if (error == 0) {
380 			error = solisten(so, uap->backlog);
381 		}
382 	}
383 #else
384 	{ error = solisten(so, uap->backlog);}
385 #endif /* MAC_SOCKET_SUBSET */
386 	else {
387 		error = EBADF;
388 	}
389 
390 	file_drop(uap->s);
391 	return error;
392 }
393 
394 /*
395  * Returns:	fp_get_ftype:EBADF	Bad file descriptor
396  *		fp_get_ftype:ENOTSOCK	Socket operation on non-socket
397  *		:EFAULT			Bad address on copyin/copyout
398  *		:EBADF			Bad file descriptor
399  *		:EOPNOTSUPP		Operation not supported on socket
400  *		:EINVAL			Invalid argument
401  *		:EWOULDBLOCK		Operation would block
402  *		:ECONNABORTED		Connection aborted
403  *		:EINTR			Interrupted function
404  *		:EACCES			Mandatory Access Control failure
405  *		falloc:ENFILE		Too many files open in system
406  *		falloc:EMFILE		Too many open files
407  *		falloc:ENOMEM		Not enough space
408  *		0			Success
409  */
410 int
accept_nocancel(struct proc * p,struct accept_nocancel_args * uap,int32_t * retval)411 accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
412     int32_t *retval)
413 {
414 	struct fileproc *fp;
415 	struct sockaddr *sa = NULL;
416 	socklen_t namelen;
417 	int error;
418 	struct socket *head, *so = NULL;
419 	lck_mtx_t *mutex_held;
420 	int fd = uap->s;
421 	int newfd;
422 	unsigned int fflag;
423 	int dosocklock = 0;
424 
425 	*retval = -1;
426 
427 	AUDIT_ARG(fd, uap->s);
428 
429 	if (uap->name) {
430 		error = copyin(uap->anamelen, (caddr_t)&namelen,
431 		    sizeof(socklen_t));
432 		if (error) {
433 			return error;
434 		}
435 	}
436 	error = fp_get_ftype(p, fd, DTYPE_SOCKET, ENOTSOCK, &fp);
437 	if (error) {
438 		return error;
439 	}
440 	head = (struct socket *)fp_get_data(fp);
441 
442 #if CONFIG_MACF_SOCKET_SUBSET
443 	if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
444 		goto out;
445 	}
446 #endif /* MAC_SOCKET_SUBSET */
447 
448 	socket_lock(head, 1);
449 
450 	if (head->so_proto->pr_getlock != NULL) {
451 		mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
452 		dosocklock = 1;
453 	} else {
454 		mutex_held = head->so_proto->pr_domain->dom_mtx;
455 		dosocklock = 0;
456 	}
457 
458 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
459 		if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
460 			error = EOPNOTSUPP;
461 		} else {
462 			/* POSIX: The socket is not accepting connections */
463 			error = EINVAL;
464 		}
465 		socket_unlock(head, 1);
466 		goto out;
467 	}
468 check_again:
469 	if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
470 		socket_unlock(head, 1);
471 		error = EWOULDBLOCK;
472 		goto out;
473 	}
474 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
475 		if (head->so_state & SS_CANTRCVMORE) {
476 			head->so_error = ECONNABORTED;
477 			break;
478 		}
479 		if (head->so_usecount < 1) {
480 			panic("accept: head=%p refcount=%d", head,
481 			    head->so_usecount);
482 		}
483 		error = msleep((caddr_t)&head->so_timeo, mutex_held,
484 		    PSOCK | PCATCH, "accept", 0);
485 		if (head->so_usecount < 1) {
486 			panic("accept: 2 head=%p refcount=%d", head,
487 			    head->so_usecount);
488 		}
489 		if ((head->so_state & SS_DRAINING)) {
490 			error = ECONNABORTED;
491 		}
492 		if (error) {
493 			socket_unlock(head, 1);
494 			goto out;
495 		}
496 	}
497 	if (head->so_error) {
498 		error = head->so_error;
499 		head->so_error = 0;
500 		socket_unlock(head, 1);
501 		goto out;
502 	}
503 
504 	/*
505 	 * At this point we know that there is at least one connection
506 	 * ready to be accepted. Remove it from the queue prior to
507 	 * allocating the file descriptor for it since falloc() may
508 	 * block allowing another process to accept the connection
509 	 * instead.
510 	 */
511 	lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
512 
513 	so_acquire_accept_list(head, NULL);
514 	if (TAILQ_EMPTY(&head->so_comp)) {
515 		so_release_accept_list(head);
516 		goto check_again;
517 	}
518 
519 	so = TAILQ_FIRST(&head->so_comp);
520 	TAILQ_REMOVE(&head->so_comp, so, so_list);
521 	so->so_head = NULL;
522 	so->so_state &= ~SS_COMP;
523 	head->so_qlen--;
524 	so_release_accept_list(head);
525 
526 	/* unlock head to avoid deadlock with select, keep a ref on head */
527 	socket_unlock(head, 0);
528 
529 #if CONFIG_MACF_SOCKET_SUBSET
530 	/*
531 	 * Pass the pre-accepted socket to the MAC framework. This is
532 	 * cheaper than allocating a file descriptor for the socket,
533 	 * calling the protocol accept callback, and possibly freeing
534 	 * the file descriptor should the MAC check fails.
535 	 */
536 	if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
537 		socket_lock(so, 1);
538 		so->so_state &= ~SS_NOFDREF;
539 		socket_unlock(so, 1);
540 		soclose(so);
541 		/* Drop reference on listening socket */
542 		sodereference(head);
543 		goto out;
544 	}
545 #endif /* MAC_SOCKET_SUBSET */
546 
547 	/*
548 	 * Pass the pre-accepted socket to any interested socket filter(s).
549 	 * Upon failure, the socket would have been closed by the callee.
550 	 */
551 	if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
552 		/* Drop reference on listening socket */
553 		sodereference(head);
554 		/* Propagate socket filter's error code to the caller */
555 		goto out;
556 	}
557 
558 	fflag = fp->f_flag;
559 	error = falloc(p, &fp, &newfd, vfs_context_current());
560 	if (error) {
561 		/*
562 		 * Probably ran out of file descriptors.
563 		 *
564 		 * <rdar://problem/8554930>
565 		 * Don't put this back on the socket like we used to, that
566 		 * just causes the client to spin. Drop the socket.
567 		 */
568 		socket_lock(so, 1);
569 		so->so_state &= ~SS_NOFDREF;
570 		socket_unlock(so, 1);
571 		soclose(so);
572 		sodereference(head);
573 		goto out;
574 	}
575 	*retval = newfd;
576 	fp->f_flag = fflag;
577 	fp->f_ops = &socketops;
578 	fp_set_data(fp, so);
579 
580 	socket_lock(head, 0);
581 	if (dosocklock) {
582 		socket_lock(so, 1);
583 	}
584 
585 	/* Sync socket non-blocking/async state with file flags */
586 	if (fp->f_flag & FNONBLOCK) {
587 		so->so_state |= SS_NBIO;
588 	} else {
589 		so->so_state &= ~SS_NBIO;
590 	}
591 
592 	if (fp->f_flag & FASYNC) {
593 		so->so_state |= SS_ASYNC;
594 		so->so_rcv.sb_flags |= SB_ASYNC;
595 		so->so_snd.sb_flags |= SB_ASYNC;
596 	} else {
597 		so->so_state &= ~SS_ASYNC;
598 		so->so_rcv.sb_flags &= ~SB_ASYNC;
599 		so->so_snd.sb_flags &= ~SB_ASYNC;
600 	}
601 
602 	(void) soacceptlock(so, &sa, 0);
603 	socket_unlock(head, 1);
604 	if (sa == NULL) {
605 		namelen = 0;
606 		if (uap->name) {
607 			goto gotnoname;
608 		}
609 		error = 0;
610 		goto releasefd;
611 	}
612 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
613 
614 	if (uap->name) {
615 		socklen_t       sa_len;
616 
617 		/* save sa_len before it is destroyed */
618 		sa_len = sa->sa_len;
619 		namelen = MIN(namelen, sa_len);
620 		error = copyout(sa, uap->name, namelen);
621 		if (!error) {
622 			/* return the actual, untruncated address length */
623 			namelen = sa_len;
624 		}
625 gotnoname:
626 		error = copyout((caddr_t)&namelen, uap->anamelen,
627 		    sizeof(socklen_t));
628 	}
629 	free_sockaddr(sa);
630 
631 releasefd:
632 	/*
633 	 * If the socket has been marked as inactive by sosetdefunct(),
634 	 * disallow further operations on it.
635 	 */
636 	if (so->so_flags & SOF_DEFUNCT) {
637 		sodefunct(current_proc(), so,
638 		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
639 	}
640 
641 	if (dosocklock) {
642 		socket_unlock(so, 1);
643 	}
644 
645 	proc_fdlock(p);
646 	procfdtbl_releasefd(p, newfd, NULL);
647 	fp_drop(p, newfd, fp, 1);
648 	proc_fdunlock(p);
649 
650 out:
651 	file_drop(fd);
652 
653 	if (error == 0 && ENTR_SHOULDTRACE) {
654 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
655 		    newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
656 	}
657 	return error;
658 }
659 
660 int
accept(struct proc * p,struct accept_args * uap,int32_t * retval)661 accept(struct proc *p, struct accept_args *uap, int32_t *retval)
662 {
663 	__pthread_testcancel(1);
664 	return accept_nocancel(p, (struct accept_nocancel_args *)uap,
665 	           retval);
666 }
667 
668 /*
669  * Returns:	0			Success
670  *		EBADF			Bad file descriptor
671  *		EALREADY		Connection already in progress
672  *		EINPROGRESS		Operation in progress
673  *		ECONNABORTED		Connection aborted
674  *		EINTR			Interrupted function
675  *		EACCES			Mandatory Access Control failure
676  *	file_socket:ENOTSOCK
677  *	file_socket:EBADF
678  *	getsockaddr:ENAMETOOLONG	Filename too long
679  *	getsockaddr:EINVAL		Invalid argument
680  *	getsockaddr:ENOMEM		Not enough space
681  *	getsockaddr:EFAULT		Bad address
682  *	soconnectlock:EOPNOTSUPP
683  *	soconnectlock:EISCONN
684  *	soconnectlock:???		[depends on protocol, filters]
685  *	msleep:EINTR
686  *
687  * Imputed:	so_error		error may be set from so_error, which
688  *					may have been set by soconnectlock.
689  */
690 /* ARGSUSED */
691 int
connect(struct proc * p,struct connect_args * uap,int32_t * retval)692 connect(struct proc *p, struct connect_args *uap, int32_t *retval)
693 {
694 	__pthread_testcancel(1);
695 	return connect_nocancel(p, (struct connect_nocancel_args *)uap,
696 	           retval);
697 }
698 
699 int
connect_nocancel(proc_t p,struct connect_nocancel_args * uap,int32_t * retval)700 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
701 {
702 #pragma unused(p, retval)
703 	struct socket *so;
704 	struct sockaddr_storage ss;
705 	struct sockaddr *sa = NULL;
706 	int error;
707 	int fd = uap->s;
708 	boolean_t dgram;
709 
710 	AUDIT_ARG(fd, uap->s);
711 	error = file_socket(fd, &so);
712 	if (error != 0) {
713 		return error;
714 	}
715 	if (so == NULL) {
716 		error = EBADF;
717 		goto out;
718 	}
719 
720 	/*
721 	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
722 	 * if this is a datagram socket; translate for other types.
723 	 */
724 	dgram = (so->so_type == SOCK_DGRAM);
725 
726 	/* Get socket address now before we obtain socket lock */
727 	if (uap->namelen > sizeof(ss)) {
728 		error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
729 	} else {
730 		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
731 		if (error == 0) {
732 			sa = (struct sockaddr *)&ss;
733 		}
734 	}
735 	if (error != 0) {
736 		goto out;
737 	}
738 
739 	error = connectit(so, sa);
740 
741 	if (sa != NULL && sa != SA(&ss)) {
742 		free_sockaddr(sa);
743 	}
744 	if (error == ERESTART) {
745 		error = EINTR;
746 	}
747 out:
748 	file_drop(fd);
749 	return error;
750 }
751 
752 static int
connectx_nocancel(struct proc * p,struct connectx_args * uap,int * retval)753 connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
754 {
755 #pragma unused(p, retval)
756 	struct sockaddr_storage ss, sd;
757 	struct sockaddr *src = NULL, *dst = NULL;
758 	struct socket *so;
759 	int error, error1, fd = uap->socket;
760 	boolean_t dgram;
761 	sae_connid_t cid = SAE_CONNID_ANY;
762 	struct user32_sa_endpoints ep32;
763 	struct user64_sa_endpoints ep64;
764 	struct user_sa_endpoints ep;
765 	user_ssize_t bytes_written = 0;
766 	struct user_iovec *iovp;
767 	uio_t auio = NULL;
768 
769 	AUDIT_ARG(fd, uap->socket);
770 	error = file_socket(fd, &so);
771 	if (error != 0) {
772 		return error;
773 	}
774 	if (so == NULL) {
775 		error = EBADF;
776 		goto out;
777 	}
778 
779 	if (uap->endpoints == USER_ADDR_NULL) {
780 		error = EINVAL;
781 		goto out;
782 	}
783 
784 	if (IS_64BIT_PROCESS(p)) {
785 		error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
786 		if (error != 0) {
787 			goto out;
788 		}
789 
790 		ep.sae_srcif = ep64.sae_srcif;
791 		ep.sae_srcaddr = (user_addr_t)ep64.sae_srcaddr;
792 		ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
793 		ep.sae_dstaddr = (user_addr_t)ep64.sae_dstaddr;
794 		ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
795 	} else {
796 		error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
797 		if (error != 0) {
798 			goto out;
799 		}
800 
801 		ep.sae_srcif = ep32.sae_srcif;
802 		ep.sae_srcaddr = ep32.sae_srcaddr;
803 		ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
804 		ep.sae_dstaddr = ep32.sae_dstaddr;
805 		ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
806 	}
807 
808 	/*
809 	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
810 	 * if this is a datagram socket; translate for other types.
811 	 */
812 	dgram = (so->so_type == SOCK_DGRAM);
813 
814 	/* Get socket address now before we obtain socket lock */
815 	if (ep.sae_srcaddr != USER_ADDR_NULL) {
816 		if (ep.sae_srcaddrlen > sizeof(ss)) {
817 			error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
818 		} else {
819 			error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
820 			if (error == 0) {
821 				src = (struct sockaddr *)&ss;
822 			}
823 		}
824 
825 		if (error) {
826 			goto out;
827 		}
828 	}
829 
830 	if (ep.sae_dstaddr == USER_ADDR_NULL) {
831 		error = EINVAL;
832 		goto out;
833 	}
834 
835 	/* Get socket address now before we obtain socket lock */
836 	if (ep.sae_dstaddrlen > sizeof(sd)) {
837 		error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
838 	} else {
839 		error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
840 		if (error == 0) {
841 			dst = (struct sockaddr *)&sd;
842 		}
843 	}
844 
845 	if (error) {
846 		goto out;
847 	}
848 
849 	VERIFY(dst != NULL);
850 
851 	if (uap->iov != USER_ADDR_NULL) {
852 		/* Verify range before calling uio_create() */
853 		if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
854 			error = EINVAL;
855 			goto out;
856 		}
857 
858 		if (uap->len == USER_ADDR_NULL) {
859 			error = EINVAL;
860 			goto out;
861 		}
862 
863 		/* allocate a uio to hold the number of iovecs passed */
864 		auio = uio_create(uap->iovcnt, 0,
865 		    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
866 		    UIO_WRITE);
867 
868 		if (auio == NULL) {
869 			error = ENOMEM;
870 			goto out;
871 		}
872 
873 		/*
874 		 * get location of iovecs within the uio.
875 		 * then copyin the iovecs from user space.
876 		 */
877 		iovp = uio_iovsaddr(auio);
878 		if (iovp == NULL) {
879 			error = ENOMEM;
880 			goto out;
881 		}
882 		error = copyin_user_iovec_array(uap->iov,
883 		    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
884 		    uap->iovcnt, iovp);
885 		if (error != 0) {
886 			goto out;
887 		}
888 
889 		/* finish setup of uio_t */
890 		error = uio_calculateresid(auio);
891 		if (error != 0) {
892 			goto out;
893 		}
894 	}
895 
896 	error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
897 	    &cid, auio, uap->flags, &bytes_written);
898 	if (error == ERESTART) {
899 		error = EINTR;
900 	}
901 
902 	if (uap->len != USER_ADDR_NULL) {
903 		error1 = copyout(&bytes_written, uap->len, sizeof(uap->len));
904 		/* give precedence to connectitx errors */
905 		if ((error1 != 0) && (error == 0)) {
906 			error = error1;
907 		}
908 	}
909 
910 	if (uap->connid != USER_ADDR_NULL) {
911 		error1 = copyout(&cid, uap->connid, sizeof(cid));
912 		/* give precedence to connectitx errors */
913 		if ((error1 != 0) && (error == 0)) {
914 			error = error1;
915 		}
916 	}
917 out:
918 	file_drop(fd);
919 	if (auio != NULL) {
920 		uio_free(auio);
921 	}
922 	if (src != NULL && src != SA(&ss)) {
923 		free_sockaddr(src);
924 	}
925 	if (dst != NULL && dst != SA(&sd)) {
926 		free_sockaddr(dst);
927 	}
928 	return error;
929 }
930 
931 int
connectx(struct proc * p,struct connectx_args * uap,int * retval)932 connectx(struct proc *p, struct connectx_args *uap, int *retval)
933 {
934 	/*
935 	 * Due to similiarity with a POSIX interface, define as
936 	 * an unofficial cancellation point.
937 	 */
938 	__pthread_testcancel(1);
939 	return connectx_nocancel(p, uap, retval);
940 }
941 
942 static int
connectit(struct socket * so,struct sockaddr * sa)943 connectit(struct socket *so, struct sockaddr *sa)
944 {
945 	int error;
946 
947 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
948 #if CONFIG_MACF_SOCKET_SUBSET
949 	if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
950 		return error;
951 	}
952 #endif /* MAC_SOCKET_SUBSET */
953 
954 	socket_lock(so, 1);
955 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
956 		error = EALREADY;
957 		goto out;
958 	}
959 	error = soconnectlock(so, sa, 0);
960 	if (error != 0) {
961 		goto out;
962 	}
963 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
964 		error = EINPROGRESS;
965 		goto out;
966 	}
967 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
968 		lck_mtx_t *mutex_held;
969 
970 		if (so->so_proto->pr_getlock != NULL) {
971 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
972 		} else {
973 			mutex_held = so->so_proto->pr_domain->dom_mtx;
974 		}
975 		error = msleep((caddr_t)&so->so_timeo, mutex_held,
976 		    PSOCK | PCATCH, __func__, 0);
977 		if (so->so_state & SS_DRAINING) {
978 			error = ECONNABORTED;
979 		}
980 		if (error != 0) {
981 			break;
982 		}
983 	}
984 	if (error == 0) {
985 		error = so->so_error;
986 		so->so_error = 0;
987 	}
988 out:
989 	socket_unlock(so, 1);
990 	return error;
991 }
992 
993 static int
connectitx(struct socket * so,struct sockaddr * src,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uio_t auio,unsigned int flags,user_ssize_t * bytes_written)994 connectitx(struct socket *so, struct sockaddr *src,
995     struct sockaddr *dst, struct proc *p, uint32_t ifscope,
996     sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
997     user_ssize_t *bytes_written)
998 {
999 	int error;
1000 
1001 	VERIFY(dst != NULL);
1002 
1003 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1004 #if CONFIG_MACF_SOCKET_SUBSET
1005 	if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1006 		return error;
1007 	}
1008 
1009 	if (auio != NULL) {
1010 		if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1011 			return error;
1012 		}
1013 	}
1014 #endif /* MAC_SOCKET_SUBSET */
1015 
1016 	socket_lock(so, 1);
1017 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1018 		error = EALREADY;
1019 		goto out;
1020 	}
1021 
1022 	error = soconnectxlocked(so, src, dst, p, ifscope,
1023 	    aid, pcid, flags, NULL, 0, auio, bytes_written);
1024 	if (error != 0) {
1025 		goto out;
1026 	}
1027 	/*
1028 	 * If, after the call to soconnectxlocked the flag is still set (in case
1029 	 * data has been queued and the connect() has actually been triggered,
1030 	 * it will have been unset by the transport), we exit immediately. There
1031 	 * is no reason to wait on any event.
1032 	 */
1033 	if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1034 		error = 0;
1035 		goto out;
1036 	}
1037 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1038 		error = EINPROGRESS;
1039 		goto out;
1040 	}
1041 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1042 		lck_mtx_t *mutex_held;
1043 
1044 		if (so->so_proto->pr_getlock != NULL) {
1045 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1046 		} else {
1047 			mutex_held = so->so_proto->pr_domain->dom_mtx;
1048 		}
1049 		error = msleep((caddr_t)&so->so_timeo, mutex_held,
1050 		    PSOCK | PCATCH, __func__, 0);
1051 		if (so->so_state & SS_DRAINING) {
1052 			error = ECONNABORTED;
1053 		}
1054 		if (error != 0) {
1055 			break;
1056 		}
1057 	}
1058 	if (error == 0) {
1059 		error = so->so_error;
1060 		so->so_error = 0;
1061 	}
1062 out:
1063 	socket_unlock(so, 1);
1064 	return error;
1065 }
1066 
1067 int
peeloff(struct proc * p,struct peeloff_args * uap,int * retval)1068 peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1069 {
1070 #pragma unused(p, uap, retval)
1071 	/*
1072 	 * Due to similiarity with a POSIX interface, define as
1073 	 * an unofficial cancellation point.
1074 	 */
1075 	__pthread_testcancel(1);
1076 	return 0;
1077 }
1078 
1079 int
disconnectx(struct proc * p,struct disconnectx_args * uap,int * retval)1080 disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1081 {
1082 	/*
1083 	 * Due to similiarity with a POSIX interface, define as
1084 	 * an unofficial cancellation point.
1085 	 */
1086 	__pthread_testcancel(1);
1087 	return disconnectx_nocancel(p, uap, retval);
1088 }
1089 
1090 static int
disconnectx_nocancel(struct proc * p,struct disconnectx_args * uap,int * retval)1091 disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1092 {
1093 #pragma unused(p, retval)
1094 	struct socket *so;
1095 	int fd = uap->s;
1096 	int error;
1097 
1098 	error = file_socket(fd, &so);
1099 	if (error != 0) {
1100 		return error;
1101 	}
1102 	if (so == NULL) {
1103 		error = EBADF;
1104 		goto out;
1105 	}
1106 
1107 	error = sodisconnectx(so, uap->aid, uap->cid);
1108 out:
1109 	file_drop(fd);
1110 	return error;
1111 }
1112 
1113 /*
1114  * Returns:	0			Success
1115  *	socreate:EAFNOSUPPORT
1116  *	socreate:EPROTOTYPE
1117  *	socreate:EPROTONOSUPPORT
1118  *	socreate:ENOBUFS
1119  *	socreate:ENOMEM
1120  *	socreate:EISCONN
1121  *	socreate:???			[other protocol families, IPSEC]
1122  *	falloc:ENFILE
1123  *	falloc:EMFILE
1124  *	falloc:ENOMEM
1125  *	copyout:EFAULT
1126  *	soconnect2:EINVAL
1127  *	soconnect2:EPROTOTYPE
1128  *	soconnect2:???			[other protocol families[
1129  */
1130 int
socketpair(struct proc * p,struct socketpair_args * uap,__unused int32_t * retval)1131 socketpair(struct proc *p, struct socketpair_args *uap,
1132     __unused int32_t *retval)
1133 {
1134 	struct fileproc *fp1, *fp2;
1135 	struct socket *so1, *so2;
1136 	int fd, error, sv[2];
1137 
1138 	AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1139 	error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1140 	if (error) {
1141 		return error;
1142 	}
1143 	error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1144 	if (error) {
1145 		goto free1;
1146 	}
1147 
1148 	error = falloc(p, &fp1, &fd, vfs_context_current());
1149 	if (error) {
1150 		goto free2;
1151 	}
1152 	fp1->f_flag = FREAD | FWRITE;
1153 	fp1->f_ops = &socketops;
1154 	fp_set_data(fp1, so1);
1155 	sv[0] = fd;
1156 
1157 	error = falloc(p, &fp2, &fd, vfs_context_current());
1158 	if (error) {
1159 		goto free3;
1160 	}
1161 	fp2->f_flag = FREAD | FWRITE;
1162 	fp2->f_ops = &socketops;
1163 	fp_set_data(fp2, so2);
1164 	sv[1] = fd;
1165 
1166 	error = soconnect2(so1, so2);
1167 	if (error) {
1168 		goto free4;
1169 	}
1170 	if (uap->type == SOCK_DGRAM) {
1171 		/*
1172 		 * Datagram socket connection is asymmetric.
1173 		 */
1174 		error = soconnect2(so2, so1);
1175 		if (error) {
1176 			goto free4;
1177 		}
1178 	}
1179 
1180 	if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1181 		goto free4;
1182 	}
1183 
1184 	proc_fdlock(p);
1185 	procfdtbl_releasefd(p, sv[0], NULL);
1186 	procfdtbl_releasefd(p, sv[1], NULL);
1187 	fp_drop(p, sv[0], fp1, 1);
1188 	fp_drop(p, sv[1], fp2, 1);
1189 	proc_fdunlock(p);
1190 
1191 	return 0;
1192 free4:
1193 	fp_free(p, sv[1], fp2);
1194 free3:
1195 	fp_free(p, sv[0], fp1);
1196 free2:
1197 	(void) soclose(so2);
1198 free1:
1199 	(void) soclose(so1);
1200 	return error;
1201 }
1202 
1203 /*
1204  * Returns:	0			Success
1205  *		EINVAL
1206  *		ENOBUFS
1207  *		EBADF
1208  *		EPIPE
1209  *		EACCES			Mandatory Access Control failure
1210  *	file_socket:ENOTSOCK
1211  *	file_socket:EBADF
1212  *	getsockaddr:ENAMETOOLONG	Filename too long
1213  *	getsockaddr:EINVAL		Invalid argument
1214  *	getsockaddr:ENOMEM		Not enough space
1215  *	getsockaddr:EFAULT		Bad address
1216  *	<pru_sosend>:EACCES[TCP]
1217  *	<pru_sosend>:EADDRINUSE[TCP]
1218  *	<pru_sosend>:EADDRNOTAVAIL[TCP]
1219  *	<pru_sosend>:EAFNOSUPPORT[TCP]
1220  *	<pru_sosend>:EAGAIN[TCP]
1221  *	<pru_sosend>:EBADF
1222  *	<pru_sosend>:ECONNRESET[TCP]
1223  *	<pru_sosend>:EFAULT
1224  *	<pru_sosend>:EHOSTUNREACH[TCP]
1225  *	<pru_sosend>:EINTR
1226  *	<pru_sosend>:EINVAL
1227  *	<pru_sosend>:EISCONN[AF_INET]
1228  *	<pru_sosend>:EMSGSIZE[TCP]
1229  *	<pru_sosend>:ENETDOWN[TCP]
1230  *	<pru_sosend>:ENETUNREACH[TCP]
1231  *	<pru_sosend>:ENOBUFS
1232  *	<pru_sosend>:ENOMEM[TCP]
1233  *	<pru_sosend>:ENOTCONN[AF_INET]
1234  *	<pru_sosend>:EOPNOTSUPP
1235  *	<pru_sosend>:EPERM[TCP]
1236  *	<pru_sosend>:EPIPE
1237  *	<pru_sosend>:EWOULDBLOCK
1238  *	<pru_sosend>:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
1239  *	<pru_sosend>:???[AF_INET]	[whatever a filter author chooses]
1240  *	<pru_sosend>:???		[value from so_error]
1241  *	sockargs:???
1242  */
1243 static int
sendit(struct proc * p,struct socket * so,struct user_msghdr * mp,uio_t uiop,int flags,int32_t * retval)1244 sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
1245     int flags, int32_t *retval)
1246 {
1247 	struct mbuf *control = NULL;
1248 	struct sockaddr_storage ss;
1249 	struct sockaddr *to = NULL;
1250 	boolean_t want_free = TRUE;
1251 	int error;
1252 	user_ssize_t len;
1253 
1254 	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1255 
1256 	if (mp->msg_name != USER_ADDR_NULL) {
1257 		if (mp->msg_namelen > sizeof(ss)) {
1258 			error = getsockaddr(so, &to, mp->msg_name,
1259 			    mp->msg_namelen, TRUE);
1260 		} else {
1261 			error = getsockaddr_s(so, &ss, mp->msg_name,
1262 			    mp->msg_namelen, TRUE);
1263 			if (error == 0) {
1264 				to = (struct sockaddr *)&ss;
1265 				want_free = FALSE;
1266 			}
1267 		}
1268 		if (error != 0) {
1269 			goto out;
1270 		}
1271 		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1272 	}
1273 	if (mp->msg_control != USER_ADDR_NULL) {
1274 		if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1275 			error = EINVAL;
1276 			goto bad;
1277 		}
1278 		error = sockargs(&control, mp->msg_control,
1279 		    mp->msg_controllen, MT_CONTROL);
1280 		if (error != 0) {
1281 			goto bad;
1282 		}
1283 	}
1284 
1285 #if CONFIG_MACF_SOCKET_SUBSET
1286 	/*
1287 	 * We check the state without holding the socket lock;
1288 	 * if a race condition occurs, it would simply result
1289 	 * in an extra call to the MAC check function.
1290 	 */
1291 	if (to != NULL &&
1292 	    !(so->so_state & SS_DEFUNCT) &&
1293 	    (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1294 		goto bad;
1295 	}
1296 #endif /* MAC_SOCKET_SUBSET */
1297 
1298 	len = uio_resid(uiop);
1299 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1300 	    control, flags);
1301 	if (error != 0) {
1302 		if (uio_resid(uiop) != len && (error == ERESTART ||
1303 		    error == EINTR || error == EWOULDBLOCK)) {
1304 			error = 0;
1305 		}
1306 		/* Generation of SIGPIPE can be controlled per socket */
1307 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1308 		    !(flags & MSG_NOSIGNAL)) {
1309 			psignal(p, SIGPIPE);
1310 		}
1311 	}
1312 	if (error == 0) {
1313 		*retval = (int)(len - uio_resid(uiop));
1314 	}
1315 bad:
1316 	if (want_free) {
1317 		free_sockaddr(to);
1318 	}
1319 out:
1320 	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1321 
1322 	return error;
1323 }
1324 
1325 /*
1326  * Returns:	0			Success
1327  *		ENOMEM
1328  *	sendit:???			[see sendit definition in this file]
1329  *	write:???			[4056224: applicable for pipes]
1330  */
1331 int
sendto(struct proc * p,struct sendto_args * uap,int32_t * retval)1332 sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
1333 {
1334 	__pthread_testcancel(1);
1335 	return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1336 }
1337 
1338 int
sendto_nocancel(struct proc * p,struct sendto_nocancel_args * uap,int32_t * retval)1339 sendto_nocancel(struct proc *p,
1340     struct sendto_nocancel_args *uap,
1341     int32_t *retval)
1342 {
1343 	struct user_msghdr msg;
1344 	int error;
1345 	uio_t auio = NULL;
1346 	struct socket *so;
1347 
1348 	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1349 	AUDIT_ARG(fd, uap->s);
1350 
1351 	if (uap->flags & MSG_SKIPCFIL) {
1352 		error = EPERM;
1353 		goto done;
1354 	}
1355 
1356 	if (uap->len > LONG_MAX) {
1357 		error = EINVAL;
1358 		goto done;
1359 	}
1360 
1361 	auio = uio_create(1, 0,
1362 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1363 	    UIO_WRITE);
1364 	if (auio == NULL) {
1365 		error = ENOMEM;
1366 		goto done;
1367 	}
1368 	uio_addiov(auio, uap->buf, uap->len);
1369 
1370 	msg.msg_name = uap->to;
1371 	msg.msg_namelen = uap->tolen;
1372 	/* no need to set up msg_iov.  sendit uses uio_t we send it */
1373 	msg.msg_iov = 0;
1374 	msg.msg_iovlen = 0;
1375 	msg.msg_control = 0;
1376 	msg.msg_flags = 0;
1377 
1378 	error = file_socket(uap->s, &so);
1379 	if (error) {
1380 		goto done;
1381 	}
1382 
1383 	if (so == NULL) {
1384 		error = EBADF;
1385 	} else {
1386 		error = sendit(p, so, &msg, auio, uap->flags, retval);
1387 	}
1388 
1389 	file_drop(uap->s);
1390 done:
1391 	if (auio != NULL) {
1392 		uio_free(auio);
1393 	}
1394 
1395 	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1396 
1397 	return error;
1398 }
1399 
1400 /*
1401  * Returns:	0			Success
1402  *		ENOBUFS
1403  *	copyin:EFAULT
1404  *	sendit:???			[see sendit definition in this file]
1405  */
1406 int
sendmsg(struct proc * p,struct sendmsg_args * uap,int32_t * retval)1407 sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1408 {
1409 	__pthread_testcancel(1);
1410 	return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1411 	           retval);
1412 }
1413 
1414 int
sendmsg_nocancel(struct proc * p,struct sendmsg_nocancel_args * uap,int32_t * retval)1415 sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1416     int32_t *retval)
1417 {
1418 	struct user32_msghdr msg32;
1419 	struct user64_msghdr msg64;
1420 	struct user_msghdr user_msg;
1421 	caddr_t msghdrp;
1422 	int     size_of_msghdr;
1423 	int error;
1424 	uio_t auio = NULL;
1425 	struct user_iovec *iovp;
1426 	struct socket *so;
1427 
1428 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1429 
1430 	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1431 	AUDIT_ARG(fd, uap->s);
1432 
1433 	if (uap->flags & MSG_SKIPCFIL) {
1434 		error = EPERM;
1435 		goto done;
1436 	}
1437 
1438 	if (is_p_64bit_process) {
1439 		msghdrp = (caddr_t)&msg64;
1440 		size_of_msghdr = sizeof(msg64);
1441 	} else {
1442 		msghdrp = (caddr_t)&msg32;
1443 		size_of_msghdr = sizeof(msg32);
1444 	}
1445 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
1446 	if (error) {
1447 		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1448 		return error;
1449 	}
1450 
1451 	if (is_p_64bit_process) {
1452 		user_msg.msg_flags = msg64.msg_flags;
1453 		user_msg.msg_controllen = msg64.msg_controllen;
1454 		user_msg.msg_control = (user_addr_t)msg64.msg_control;
1455 		user_msg.msg_iovlen = msg64.msg_iovlen;
1456 		user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
1457 		user_msg.msg_namelen = msg64.msg_namelen;
1458 		user_msg.msg_name = (user_addr_t)msg64.msg_name;
1459 	} else {
1460 		user_msg.msg_flags = msg32.msg_flags;
1461 		user_msg.msg_controllen = msg32.msg_controllen;
1462 		user_msg.msg_control = msg32.msg_control;
1463 		user_msg.msg_iovlen = msg32.msg_iovlen;
1464 		user_msg.msg_iov = msg32.msg_iov;
1465 		user_msg.msg_namelen = msg32.msg_namelen;
1466 		user_msg.msg_name = msg32.msg_name;
1467 	}
1468 
1469 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1470 		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1471 		    0, 0, 0, 0);
1472 		return EMSGSIZE;
1473 	}
1474 
1475 	/* allocate a uio large enough to hold the number of iovecs passed */
1476 	auio = uio_create(user_msg.msg_iovlen, 0,
1477 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1478 	    UIO_WRITE);
1479 	if (auio == NULL) {
1480 		error = ENOBUFS;
1481 		goto done;
1482 	}
1483 
1484 	if (user_msg.msg_iovlen) {
1485 		/*
1486 		 * get location of iovecs within the uio.
1487 		 * then copyin the iovecs from user space.
1488 		 */
1489 		iovp = uio_iovsaddr(auio);
1490 		if (iovp == NULL) {
1491 			error = ENOBUFS;
1492 			goto done;
1493 		}
1494 		error = copyin_user_iovec_array(user_msg.msg_iov,
1495 		    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1496 		    user_msg.msg_iovlen, iovp);
1497 		if (error) {
1498 			goto done;
1499 		}
1500 		user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1501 
1502 		/* finish setup of uio_t */
1503 		error = uio_calculateresid(auio);
1504 		if (error) {
1505 			goto done;
1506 		}
1507 	} else {
1508 		user_msg.msg_iov = 0;
1509 	}
1510 
1511 	/* msg_flags is ignored for send */
1512 	user_msg.msg_flags = 0;
1513 
1514 	error = file_socket(uap->s, &so);
1515 	if (error) {
1516 		goto done;
1517 	}
1518 	if (so == NULL) {
1519 		error = EBADF;
1520 	} else {
1521 		error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1522 	}
1523 	file_drop(uap->s);
1524 done:
1525 	if (auio != NULL) {
1526 		uio_free(auio);
1527 	}
1528 	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1529 
1530 	return error;
1531 }
1532 
1533 int
sendmsg_x(struct proc * p,struct sendmsg_x_args * uap,user_ssize_t * retval)1534 sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1535 {
1536 	int error = 0;
1537 	struct user_msghdr_x *user_msg_x = NULL;
1538 	struct uio **uiop = NULL;
1539 	struct socket *so;
1540 	u_int i;
1541 	struct sockaddr *to = NULL;
1542 	user_ssize_t len_before = 0, len_after;
1543 	int need_drop = 0;
1544 	size_t size_of_msghdr;
1545 	void *umsgp = NULL;
1546 	u_int uiocnt;
1547 	int has_addr_or_ctl = 0;
1548 
1549 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1550 
1551 	size_of_msghdr = IS_64BIT_PROCESS(p) ?
1552 	    sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1553 
1554 	if (uap->flags & MSG_SKIPCFIL) {
1555 		error = EPERM;
1556 		goto out;
1557 	}
1558 
1559 	error = file_socket(uap->s, &so);
1560 	if (error) {
1561 		goto out;
1562 	}
1563 	need_drop = 1;
1564 	if (so == NULL) {
1565 		error = EBADF;
1566 		goto out;
1567 	}
1568 
1569 	/*
1570 	 * Input parameter range check
1571 	 */
1572 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1573 		error = EINVAL;
1574 		goto out;
1575 	}
1576 	/*
1577 	 * Clip to max currently allowed
1578 	 */
1579 	if (uap->cnt > somaxsendmsgx) {
1580 		uap->cnt = somaxsendmsgx;
1581 	}
1582 
1583 	user_msg_x = kalloc_data(uap->cnt * sizeof(struct user_msghdr_x),
1584 	    Z_WAITOK | Z_ZERO);
1585 	if (user_msg_x == NULL) {
1586 		DBG_PRINTF("%s user_msg_x alloc failed\n", __func__);
1587 		error = ENOMEM;
1588 		goto out;
1589 	}
1590 	uiop = kalloc_type(struct uio *, uap->cnt, Z_WAITOK | Z_ZERO);
1591 	if (uiop == NULL) {
1592 		DBG_PRINTF("%s uiop alloc failed\n", __func__);
1593 		error = ENOMEM;
1594 		goto out;
1595 	}
1596 
1597 	umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
1598 	if (umsgp == NULL) {
1599 		printf("%s user_msg_x alloc failed\n", __func__);
1600 		error = ENOMEM;
1601 		goto out;
1602 	}
1603 	error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1604 	if (error) {
1605 		DBG_PRINTF("%s copyin() failed\n", __func__);
1606 		goto out;
1607 	}
1608 	error = internalize_user_msghdr_array(umsgp,
1609 	    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1610 	    UIO_WRITE, uap->cnt, user_msg_x, uiop);
1611 	if (error) {
1612 		DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
1613 		goto out;
1614 	}
1615 	/*
1616 	 * Make sure the size of each message iovec and
1617 	 * the aggregate size of all the iovec is valid
1618 	 */
1619 	if (uio_array_is_valid(uiop, uap->cnt) == false) {
1620 		error = EINVAL;
1621 		goto out;
1622 	}
1623 
1624 	/*
1625 	 * Sanity check on passed arguments
1626 	 */
1627 	for (i = 0; i < uap->cnt; i++) {
1628 		struct user_msghdr_x *mp = user_msg_x + i;
1629 
1630 		/*
1631 		 * No flags on send message
1632 		 */
1633 		if (mp->msg_flags != 0) {
1634 			error = EINVAL;
1635 			goto out;
1636 		}
1637 		/*
1638 		 * No support for address or ancillary data (yet)
1639 		 */
1640 		if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
1641 			has_addr_or_ctl = 1;
1642 		}
1643 
1644 		if (mp->msg_control != USER_ADDR_NULL ||
1645 		    mp->msg_controllen != 0) {
1646 			has_addr_or_ctl = 1;
1647 		}
1648 
1649 #if CONFIG_MACF_SOCKET_SUBSET
1650 		/*
1651 		 * We check the state without holding the socket lock;
1652 		 * if a race condition occurs, it would simply result
1653 		 * in an extra call to the MAC check function.
1654 		 *
1655 		 * Note: The following check is never true taken with the
1656 		 * current limitation that we do not accept to pass an address,
1657 		 * this is effectively placeholder code. If we add support for
1658 		 * addresses, we will have to check every address.
1659 		 */
1660 		if (to != NULL &&
1661 		    !(so->so_state & SS_DEFUNCT) &&
1662 		    (error = mac_socket_check_send(kauth_cred_get(), so, to))
1663 		    != 0) {
1664 			goto out;
1665 		}
1666 #endif /* MAC_SOCKET_SUBSET */
1667 	}
1668 
1669 	len_before = uio_array_resid(uiop, uap->cnt);
1670 
1671 	/*
1672 	 * Feed list of packets at once only for connected socket without
1673 	 * control message
1674 	 */
1675 	if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1676 	    pru_sosend_list_notsupp &&
1677 	    has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1678 		error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1679 		    uap->cnt, uap->flags);
1680 	} else {
1681 		for (i = 0; i < uap->cnt; i++) {
1682 			struct user_msghdr_x *mp = user_msg_x + i;
1683 			struct user_msghdr user_msg;
1684 			uio_t auio = uiop[i];
1685 			int32_t tmpval;
1686 
1687 			user_msg.msg_flags = mp->msg_flags;
1688 			user_msg.msg_controllen = mp->msg_controllen;
1689 			user_msg.msg_control = mp->msg_control;
1690 			user_msg.msg_iovlen = mp->msg_iovlen;
1691 			user_msg.msg_iov = mp->msg_iov;
1692 			user_msg.msg_namelen = mp->msg_namelen;
1693 			user_msg.msg_name = mp->msg_name;
1694 
1695 			error = sendit(p, so, &user_msg, auio, uap->flags,
1696 			    &tmpval);
1697 			if (error != 0) {
1698 				break;
1699 			}
1700 		}
1701 	}
1702 	len_after = uio_array_resid(uiop, uap->cnt);
1703 
1704 	VERIFY(len_after <= len_before);
1705 
1706 	if (error != 0) {
1707 		if (len_after != len_before && (error == ERESTART ||
1708 		    error == EINTR || error == EWOULDBLOCK ||
1709 		    error == ENOBUFS)) {
1710 			error = 0;
1711 		}
1712 		/* Generation of SIGPIPE can be controlled per socket */
1713 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1714 		    !(uap->flags & MSG_NOSIGNAL)) {
1715 			psignal(p, SIGPIPE);
1716 		}
1717 	}
1718 	if (error == 0) {
1719 		uiocnt = externalize_user_msghdr_array(umsgp,
1720 		    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1721 		    UIO_WRITE, uap->cnt, user_msg_x, uiop);
1722 
1723 		*retval = (int)(uiocnt);
1724 	}
1725 out:
1726 	if (need_drop) {
1727 		file_drop(uap->s);
1728 	}
1729 	kfree_data(umsgp, uap->cnt * size_of_msghdr);
1730 	if (uiop != NULL) {
1731 		free_uio_array(uiop, uap->cnt);
1732 		kfree_type(struct uio *, uap->cnt, uiop);
1733 	}
1734 	kfree_data(user_msg_x, uap->cnt * sizeof(struct user_msghdr_x));
1735 
1736 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1737 
1738 	return error;
1739 }
1740 
1741 
1742 static int
copyout_sa(struct sockaddr * fromsa,user_addr_t name,socklen_t * namelen)1743 copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1744 {
1745 	int error = 0;
1746 	socklen_t sa_len = 0;
1747 	ssize_t len;
1748 
1749 	len = *namelen;
1750 	if (len <= 0 || fromsa == 0) {
1751 		len = 0;
1752 	} else {
1753 #ifndef MIN
1754 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1755 #endif
1756 		sa_len = fromsa->sa_len;
1757 		len = MIN((unsigned int)len, sa_len);
1758 		error = copyout(fromsa, name, (unsigned)len);
1759 		if (error) {
1760 			goto out;
1761 		}
1762 	}
1763 	*namelen = sa_len;
1764 out:
1765 	return 0;
1766 }
1767 
1768 static int
copyout_control(struct proc * p,struct mbuf * m,user_addr_t control,socklen_t * controllen,int * flags,struct socket * so)1769 copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1770     socklen_t *controllen, int *flags, struct socket *so)
1771 {
1772 	int error = 0;
1773 	socklen_t len;
1774 	user_addr_t ctlbuf;
1775 	struct inpcb *inp = NULL;
1776 	bool want_pktinfo = false;
1777 	bool seen_pktinfo = false;
1778 
1779 	if (so != NULL && (SOCK_DOM(so) == PF_INET6 || SOCK_DOM(so) == PF_INET)) {
1780 		inp = sotoinpcb(so);
1781 		want_pktinfo = (inp->inp_flags & IN6P_PKTINFO) != 0;
1782 	}
1783 
1784 	len = *controllen;
1785 	*controllen = 0;
1786 	ctlbuf = control;
1787 
1788 	while (m && len > 0) {
1789 		socklen_t tocopy;
1790 		struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1791 		socklen_t cp_size = CMSG_ALIGN(cp->cmsg_len);
1792 		socklen_t buflen = m->m_len;
1793 
1794 		while (buflen > 0 && len > 0) {
1795 			/*
1796 			 * SCM_TIMESTAMP hack because  struct timeval has a
1797 			 * different size for 32 bits and 64 bits processes
1798 			 */
1799 			if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1800 				unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
1801 				struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1802 				socklen_t tmp_space;
1803 				struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1804 
1805 				tmp_cp->cmsg_level = SOL_SOCKET;
1806 				tmp_cp->cmsg_type = SCM_TIMESTAMP;
1807 
1808 				if (proc_is64bit(p)) {
1809 					struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1810 
1811 					os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
1812 					os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
1813 
1814 					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1815 					tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1816 				} else {
1817 					struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1818 
1819 					tv32->tv_sec = (user32_time_t)tv->tv_sec;
1820 					tv32->tv_usec = tv->tv_usec;
1821 
1822 					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1823 					tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1824 				}
1825 				if (len >= tmp_space) {
1826 					tocopy = tmp_space;
1827 				} else {
1828 					*flags |= MSG_CTRUNC;
1829 					tocopy = len;
1830 				}
1831 				error = copyout(tmp_buffer, ctlbuf, tocopy);
1832 				if (error) {
1833 					goto out;
1834 				}
1835 			} else {
1836 				/* If socket has flow tracking and socket did not request address, ignore it */
1837 				if (SOFLOW_ENABLED(so) &&
1838 				    ((cp->cmsg_level == IPPROTO_IP && cp->cmsg_type == IP_RECVDSTADDR && inp != NULL &&
1839 				    !(inp->inp_flags & INP_RECVDSTADDR)) ||
1840 				    (cp->cmsg_level == IPPROTO_IPV6 && (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO) && inp &&
1841 				    !(inp->inp_flags & IN6P_PKTINFO)))) {
1842 					tocopy = 0;
1843 				} else {
1844 					if (cp_size > buflen) {
1845 						panic("cp_size > buflen, something"
1846 						    "wrong with alignment!");
1847 					}
1848 					if (len >= cp_size) {
1849 						tocopy = cp_size;
1850 					} else {
1851 						*flags |= MSG_CTRUNC;
1852 						tocopy = len;
1853 					}
1854 					error = copyout((caddr_t) cp, ctlbuf, tocopy);
1855 					if (error) {
1856 						goto out;
1857 					}
1858 					if (want_pktinfo && cp->cmsg_level == IPPROTO_IPV6 &&
1859 					    (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO)) {
1860 						seen_pktinfo = true;
1861 					}
1862 				}
1863 			}
1864 
1865 			ctlbuf += tocopy;
1866 			len -= tocopy;
1867 
1868 			buflen -= cp_size;
1869 			cp = (struct cmsghdr *)(void *)
1870 			    ((unsigned char *) cp + cp_size);
1871 			cp_size = CMSG_ALIGN(cp->cmsg_len);
1872 		}
1873 
1874 		m = m->m_next;
1875 	}
1876 	*controllen = (socklen_t)(ctlbuf - control);
1877 out:
1878 	if (want_pktinfo && !seen_pktinfo) {
1879 		missingpktinfo += 1;
1880 #if (DEBUG || DEVELOPMENT)
1881 		char pname[MAXCOMLEN];
1882 		char local[MAX_IPv6_STR_LEN + 6];
1883 		char remote[MAX_IPv6_STR_LEN + 6];
1884 
1885 		proc_name(so->last_pid, pname, sizeof(MAXCOMLEN));
1886 		if (inp->inp_vflag & INP_IPV6) {
1887 			inet_ntop(AF_INET6, &inp->in6p_laddr.s6_addr, local, sizeof(local));
1888 			inet_ntop(AF_INET6, &inp->in6p_faddr.s6_addr, remote, sizeof(local));
1889 		} else {
1890 			inet_ntop(AF_INET, &inp->inp_laddr.s_addr, local, sizeof(local));
1891 			inet_ntop(AF_INET, &inp->inp_faddr.s_addr, remote, sizeof(local));
1892 		}
1893 
1894 		os_log(OS_LOG_DEFAULT,
1895 		    "cmsg IPV6_PKTINFO missing for %s:%u > %s:%u proc %s.%u error %d\n",
1896 		    local, ntohs(inp->inp_lport), remote, ntohs(inp->inp_fport),
1897 		    pname, so->last_pid, error);
1898 #endif /* (DEBUG || DEVELOPMENT) */
1899 	}
1900 	return error;
1901 }
1902 
1903 /*
1904  * Returns:	0			Success
1905  *		ENOTSOCK
1906  *		EINVAL
1907  *		EBADF
1908  *		EACCES			Mandatory Access Control failure
1909  *	copyout:EFAULT
1910  *	fp_lookup:EBADF
1911  *	<pru_soreceive>:ENOBUFS
1912  *	<pru_soreceive>:ENOTCONN
1913  *	<pru_soreceive>:EWOULDBLOCK
1914  *	<pru_soreceive>:EFAULT
1915  *	<pru_soreceive>:EINTR
1916  *	<pru_soreceive>:EBADF
1917  *	<pru_soreceive>:EINVAL
1918  *	<pru_soreceive>:EMSGSIZE
1919  *	<pru_soreceive>:???
1920  *
1921  * Notes:	Additional return values from calls through <pru_soreceive>
1922  *		depend on protocols other than TCP or AF_UNIX, which are
1923  *		documented above.
1924  */
1925 static int
recvit(struct proc * p,int s,struct user_msghdr * mp,uio_t uiop,user_addr_t namelenp,int32_t * retval)1926 recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1927     user_addr_t namelenp, int32_t *retval)
1928 {
1929 	ssize_t len;
1930 	int error;
1931 	struct mbuf *control = 0;
1932 	struct socket *so;
1933 	struct sockaddr *fromsa = 0;
1934 	struct fileproc *fp;
1935 
1936 	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1937 	if ((error = fp_get_ftype(p, s, DTYPE_SOCKET, ENOTSOCK, &fp))) {
1938 		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1939 		return error;
1940 	}
1941 	so = (struct socket *)fp_get_data(fp);
1942 
1943 #if CONFIG_MACF_SOCKET_SUBSET
1944 	/*
1945 	 * We check the state without holding the socket lock;
1946 	 * if a race condition occurs, it would simply result
1947 	 * in an extra call to the MAC check function.
1948 	 */
1949 	if (!(so->so_state & SS_DEFUNCT) &&
1950 	    !(so->so_state & SS_ISCONNECTED) &&
1951 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1952 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
1953 		goto out1;
1954 	}
1955 #endif /* MAC_SOCKET_SUBSET */
1956 	if (uio_resid(uiop) < 0 || uio_resid(uiop) > INT_MAX) {
1957 		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1958 		error = EINVAL;
1959 		goto out1;
1960 	}
1961 
1962 	len = uio_resid(uiop);
1963 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1964 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1965 	    &mp->msg_flags);
1966 	if (fromsa) {
1967 		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1968 		    fromsa);
1969 	}
1970 	if (error) {
1971 		if (uio_resid(uiop) != len && (error == ERESTART ||
1972 		    error == EINTR || error == EWOULDBLOCK)) {
1973 			error = 0;
1974 		}
1975 	}
1976 	if (error) {
1977 		goto out;
1978 	}
1979 
1980 	*retval = (int32_t)(len - uio_resid(uiop));
1981 
1982 	if (mp->msg_name) {
1983 		error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1984 		if (error) {
1985 			goto out;
1986 		}
1987 		/* return the actual, untruncated address length */
1988 		if (namelenp &&
1989 		    (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
1990 		    sizeof(int)))) {
1991 			goto out;
1992 		}
1993 	}
1994 
1995 	if (mp->msg_control) {
1996 		error = copyout_control(p, control, mp->msg_control,
1997 		    &mp->msg_controllen, &mp->msg_flags, so);
1998 	}
1999 out:
2000 	free_sockaddr(fromsa);
2001 	if (control) {
2002 		m_freem(control);
2003 	}
2004 	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2005 out1:
2006 	fp_drop(p, s, fp, 0);
2007 	return error;
2008 }
2009 
2010 /*
2011  * Returns:	0			Success
2012  *		ENOMEM
2013  *	copyin:EFAULT
2014  *	recvit:???
2015  *	read:???			[4056224: applicable for pipes]
2016  *
2017  * Notes:	The read entry point is only called as part of support for
2018  *		binary backward compatability; new code should use read
2019  *		instead of recv or recvfrom when attempting to read data
2020  *		from pipes.
2021  *
2022  *		For full documentation of the return codes from recvit, see
2023  *		the block header for the recvit function.
2024  */
2025 int
recvfrom(struct proc * p,struct recvfrom_args * uap,int32_t * retval)2026 recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2027 {
2028 	__pthread_testcancel(1);
2029 	return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2030 	           retval);
2031 }
2032 
2033 int
recvfrom_nocancel(struct proc * p,struct recvfrom_nocancel_args * uap,int32_t * retval)2034 recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
2035     int32_t *retval)
2036 {
2037 	struct user_msghdr msg;
2038 	int error;
2039 	uio_t auio = NULL;
2040 
2041 	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2042 	AUDIT_ARG(fd, uap->s);
2043 
2044 	if (uap->fromlenaddr) {
2045 		error = copyin(uap->fromlenaddr,
2046 		    (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2047 		if (error) {
2048 			return error;
2049 		}
2050 	} else {
2051 		msg.msg_namelen = 0;
2052 	}
2053 	msg.msg_name = uap->from;
2054 	auio = uio_create(1, 0,
2055 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2056 	    UIO_READ);
2057 	if (auio == NULL) {
2058 		return ENOMEM;
2059 	}
2060 
2061 	uio_addiov(auio, uap->buf, uap->len);
2062 	/* no need to set up msg_iov.  recvit uses uio_t we send it */
2063 	msg.msg_iov = 0;
2064 	msg.msg_iovlen = 0;
2065 	msg.msg_control = 0;
2066 	msg.msg_controllen = 0;
2067 	msg.msg_flags = uap->flags;
2068 	error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2069 	if (auio != NULL) {
2070 		uio_free(auio);
2071 	}
2072 
2073 	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2074 
2075 	return error;
2076 }
2077 
2078 /*
2079  * Returns:	0			Success
2080  *		EMSGSIZE
2081  *		ENOMEM
2082  *	copyin:EFAULT
2083  *	copyout:EFAULT
2084  *	recvit:???
2085  *
2086  * Notes:	For full documentation of the return codes from recvit, see
2087  *		the block header for the recvit function.
2088  */
2089 int
recvmsg(struct proc * p,struct recvmsg_args * uap,int32_t * retval)2090 recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
2091 {
2092 	__pthread_testcancel(1);
2093 	return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2094 	           retval);
2095 }
2096 
2097 int
recvmsg_nocancel(struct proc * p,struct recvmsg_nocancel_args * uap,int32_t * retval)2098 recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2099     int32_t *retval)
2100 {
2101 	struct user32_msghdr msg32;
2102 	struct user64_msghdr msg64;
2103 	struct user_msghdr user_msg;
2104 	caddr_t msghdrp;
2105 	int     size_of_msghdr;
2106 	user_addr_t uiov;
2107 	int error;
2108 	uio_t auio = NULL;
2109 	struct user_iovec *iovp;
2110 
2111 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2112 
2113 	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2114 	AUDIT_ARG(fd, uap->s);
2115 	if (is_p_64bit_process) {
2116 		msghdrp = (caddr_t)&msg64;
2117 		size_of_msghdr = sizeof(msg64);
2118 	} else {
2119 		msghdrp = (caddr_t)&msg32;
2120 		size_of_msghdr = sizeof(msg32);
2121 	}
2122 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
2123 	if (error) {
2124 		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2125 		return error;
2126 	}
2127 
2128 	/* only need to copy if user process is not 64-bit */
2129 	if (is_p_64bit_process) {
2130 		user_msg.msg_flags = msg64.msg_flags;
2131 		user_msg.msg_controllen = msg64.msg_controllen;
2132 		user_msg.msg_control = (user_addr_t)msg64.msg_control;
2133 		user_msg.msg_iovlen = msg64.msg_iovlen;
2134 		user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
2135 		user_msg.msg_namelen = msg64.msg_namelen;
2136 		user_msg.msg_name = (user_addr_t)msg64.msg_name;
2137 	} else {
2138 		user_msg.msg_flags = msg32.msg_flags;
2139 		user_msg.msg_controllen = msg32.msg_controllen;
2140 		user_msg.msg_control = msg32.msg_control;
2141 		user_msg.msg_iovlen = msg32.msg_iovlen;
2142 		user_msg.msg_iov = msg32.msg_iov;
2143 		user_msg.msg_namelen = msg32.msg_namelen;
2144 		user_msg.msg_name = msg32.msg_name;
2145 	}
2146 
2147 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2148 		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2149 		    0, 0, 0, 0);
2150 		return EMSGSIZE;
2151 	}
2152 
2153 	user_msg.msg_flags = uap->flags;
2154 
2155 	/* allocate a uio large enough to hold the number of iovecs passed */
2156 	auio = uio_create(user_msg.msg_iovlen, 0,
2157 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
2158 	    UIO_READ);
2159 	if (auio == NULL) {
2160 		error = ENOMEM;
2161 		goto done;
2162 	}
2163 
2164 	/*
2165 	 * get location of iovecs within the uio.  then copyin the iovecs from
2166 	 * user space.
2167 	 */
2168 	iovp = uio_iovsaddr(auio);
2169 	if (iovp == NULL) {
2170 		error = ENOMEM;
2171 		goto done;
2172 	}
2173 	uiov = user_msg.msg_iov;
2174 	user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2175 	error = copyin_user_iovec_array(uiov,
2176 	    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2177 	    user_msg.msg_iovlen, iovp);
2178 	if (error) {
2179 		goto done;
2180 	}
2181 
2182 	/* finish setup of uio_t */
2183 	error = uio_calculateresid(auio);
2184 	if (error) {
2185 		goto done;
2186 	}
2187 
2188 	error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2189 	if (!error) {
2190 		user_msg.msg_iov = uiov;
2191 		if (is_p_64bit_process) {
2192 			msg64.msg_flags = user_msg.msg_flags;
2193 			msg64.msg_controllen = user_msg.msg_controllen;
2194 			msg64.msg_control = user_msg.msg_control;
2195 			msg64.msg_iovlen = user_msg.msg_iovlen;
2196 			msg64.msg_iov = user_msg.msg_iov;
2197 			msg64.msg_namelen = user_msg.msg_namelen;
2198 			msg64.msg_name = user_msg.msg_name;
2199 		} else {
2200 			msg32.msg_flags = user_msg.msg_flags;
2201 			msg32.msg_controllen = user_msg.msg_controllen;
2202 			msg32.msg_control = (user32_addr_t)user_msg.msg_control;
2203 			msg32.msg_iovlen = user_msg.msg_iovlen;
2204 			msg32.msg_iov = (user32_addr_t)user_msg.msg_iov;
2205 			msg32.msg_namelen = user_msg.msg_namelen;
2206 			msg32.msg_name = (user32_addr_t)user_msg.msg_name;
2207 		}
2208 		error = copyout(msghdrp, uap->msg, size_of_msghdr);
2209 	}
2210 done:
2211 	if (auio != NULL) {
2212 		uio_free(auio);
2213 	}
2214 	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2215 	return error;
2216 }
2217 
2218 int
recvmsg_x(struct proc * p,struct recvmsg_x_args * uap,user_ssize_t * retval)2219 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2220 {
2221 	int error = EOPNOTSUPP;
2222 	struct user_msghdr_x *user_msg_x = NULL;
2223 	struct recv_msg_elem *recv_msg_array = NULL;
2224 	struct socket *so;
2225 	user_ssize_t len_before = 0, len_after;
2226 	int need_drop = 0;
2227 	size_t size_of_msghdr;
2228 	void *umsgp = NULL;
2229 	u_int i;
2230 	u_int uiocnt;
2231 
2232 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2233 
2234 	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2235 
2236 	size_of_msghdr = is_p_64bit_process ?
2237 	    sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2238 
2239 	error = file_socket(uap->s, &so);
2240 	if (error) {
2241 		goto out;
2242 	}
2243 	need_drop = 1;
2244 	if (so == NULL) {
2245 		error = EBADF;
2246 		goto out;
2247 	}
2248 	/*
2249 	 * Support only a subset of message flags
2250 	 */
2251 	if (uap->flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA |  MSG_NBIO)) {
2252 		return EOPNOTSUPP;
2253 	}
2254 	/*
2255 	 * Input parameter range check
2256 	 */
2257 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2258 		error = EINVAL;
2259 		goto out;
2260 	}
2261 	if (uap->cnt > somaxrecvmsgx) {
2262 		uap->cnt = somaxrecvmsgx;
2263 	}
2264 
2265 	user_msg_x = kalloc_data(uap->cnt * sizeof(struct user_msghdr_x),
2266 	    Z_WAITOK | Z_ZERO);
2267 	if (user_msg_x == NULL) {
2268 		DBG_PRINTF("%s user_msg_x alloc failed\n", __func__);
2269 		error = ENOMEM;
2270 		goto out;
2271 	}
2272 	recv_msg_array = alloc_recv_msg_array(uap->cnt);
2273 	if (recv_msg_array == NULL) {
2274 		DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
2275 		error = ENOMEM;
2276 		goto out;
2277 	}
2278 
2279 	umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
2280 	if (umsgp == NULL) {
2281 		DBG_PRINTF("%s umsgp alloc failed\n", __func__);
2282 		error = ENOMEM;
2283 		goto out;
2284 	}
2285 	error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2286 	if (error) {
2287 		DBG_PRINTF("%s copyin() failed\n", __func__);
2288 		goto out;
2289 	}
2290 	error = internalize_recv_msghdr_array(umsgp,
2291 	    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2292 	    UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2293 	if (error) {
2294 		DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
2295 		goto out;
2296 	}
2297 	/*
2298 	 * Make sure the size of each message iovec and
2299 	 * the aggregate size of all the iovec is valid
2300 	 */
2301 	if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2302 		error = EINVAL;
2303 		goto out;
2304 	}
2305 	/*
2306 	 * Sanity check on passed arguments
2307 	 */
2308 	for (i = 0; i < uap->cnt; i++) {
2309 		struct user_msghdr_x *mp = user_msg_x + i;
2310 
2311 		if (mp->msg_flags != 0) {
2312 			error = EINVAL;
2313 			goto out;
2314 		}
2315 	}
2316 #if CONFIG_MACF_SOCKET_SUBSET
2317 	/*
2318 	 * We check the state without holding the socket lock;
2319 	 * if a race condition occurs, it would simply result
2320 	 * in an extra call to the MAC check function.
2321 	 */
2322 	if (!(so->so_state & SS_DEFUNCT) &&
2323 	    !(so->so_state & SS_ISCONNECTED) &&
2324 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2325 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2326 		goto out;
2327 	}
2328 #endif /* MAC_SOCKET_SUBSET */
2329 
2330 	len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2331 
2332 	if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2333 	    pru_soreceive_list_notsupp &&
2334 	    somaxrecvmsgx == 0) {
2335 		error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2336 		    recv_msg_array, uap->cnt, &uap->flags);
2337 	} else {
2338 		int flags = uap->flags;
2339 
2340 		for (i = 0; i < uap->cnt; i++) {
2341 			struct recv_msg_elem *recv_msg_elem;
2342 			uio_t auio;
2343 			struct sockaddr **psa;
2344 			struct mbuf **controlp;
2345 
2346 			recv_msg_elem = recv_msg_array + i;
2347 			auio = recv_msg_elem->uio;
2348 
2349 			/*
2350 			 * Do not block if we got at least one packet
2351 			 */
2352 			if (i > 0) {
2353 				flags |= MSG_DONTWAIT;
2354 			}
2355 
2356 			psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2357 			    &recv_msg_elem->psa : NULL;
2358 			controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2359 			    &recv_msg_elem->controlp : NULL;
2360 
2361 			error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2362 			    auio, (struct mbuf **)NULL, controlp, &flags);
2363 			if (error) {
2364 				break;
2365 			}
2366 			/*
2367 			 * We have some data
2368 			 */
2369 			recv_msg_elem->which |= SOCK_MSG_DATA;
2370 			/*
2371 			 * Set the messages flags for this packet
2372 			 */
2373 			flags &= ~MSG_DONTWAIT;
2374 			recv_msg_elem->flags = flags;
2375 			/*
2376 			 * Stop on partial copy
2377 			 */
2378 			if (recv_msg_elem->flags & (MSG_RCVMORE | MSG_TRUNC)) {
2379 				break;
2380 			}
2381 		}
2382 	}
2383 
2384 	len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2385 
2386 	if (error) {
2387 		if (len_after != len_before && (error == ERESTART ||
2388 		    error == EINTR || error == EWOULDBLOCK)) {
2389 			error = 0;
2390 		} else {
2391 			goto out;
2392 		}
2393 	}
2394 
2395 	uiocnt = externalize_recv_msghdr_array(p, so, umsgp,
2396 	    uap->cnt, user_msg_x, recv_msg_array, &error);
2397 	if (error != 0) {
2398 		goto out;
2399 	}
2400 
2401 	error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2402 	if (error) {
2403 		DBG_PRINTF("%s copyout() failed\n", __func__);
2404 		goto out;
2405 	}
2406 	*retval = (int)(uiocnt);
2407 
2408 out:
2409 	if (need_drop) {
2410 		file_drop(uap->s);
2411 	}
2412 	kfree_data(umsgp, uap->cnt * size_of_msghdr);
2413 	free_recv_msg_array(recv_msg_array, uap->cnt);
2414 	kfree_data(user_msg_x, uap->cnt * sizeof(struct user_msghdr_x));
2415 
2416 	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2417 
2418 	return error;
2419 }
2420 
2421 /*
2422  * Returns:	0			Success
2423  *		EBADF
2424  *	file_socket:ENOTSOCK
2425  *	file_socket:EBADF
2426  *	soshutdown:EINVAL
2427  *	soshutdown:ENOTCONN
2428  *	soshutdown:EADDRNOTAVAIL[TCP]
2429  *	soshutdown:ENOBUFS[TCP]
2430  *	soshutdown:EMSGSIZE[TCP]
2431  *	soshutdown:EHOSTUNREACH[TCP]
2432  *	soshutdown:ENETUNREACH[TCP]
2433  *	soshutdown:ENETDOWN[TCP]
2434  *	soshutdown:ENOMEM[TCP]
2435  *	soshutdown:EACCES[TCP]
2436  *	soshutdown:EMSGSIZE[TCP]
2437  *	soshutdown:ENOBUFS[TCP]
2438  *	soshutdown:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
2439  *	soshutdown:???			[other protocol families]
2440  */
2441 /* ARGSUSED */
2442 int
shutdown(__unused struct proc * p,struct shutdown_args * uap,__unused int32_t * retval)2443 shutdown(__unused struct proc *p, struct shutdown_args *uap,
2444     __unused int32_t *retval)
2445 {
2446 	struct socket *so;
2447 	int error;
2448 
2449 	AUDIT_ARG(fd, uap->s);
2450 	error = file_socket(uap->s, &so);
2451 	if (error) {
2452 		return error;
2453 	}
2454 	if (so == NULL) {
2455 		error = EBADF;
2456 		goto out;
2457 	}
2458 	error =  soshutdown((struct socket *)so, uap->how);
2459 out:
2460 	file_drop(uap->s);
2461 	return error;
2462 }
2463 
2464 /*
2465  * Returns:	0			Success
2466  *		EFAULT
2467  *		EINVAL
2468  *		EACCES			Mandatory Access Control failure
2469  *	file_socket:ENOTSOCK
2470  *	file_socket:EBADF
2471  *	sosetopt:EINVAL
2472  *	sosetopt:ENOPROTOOPT
2473  *	sosetopt:ENOBUFS
2474  *	sosetopt:EDOM
2475  *	sosetopt:EFAULT
2476  *	sosetopt:EOPNOTSUPP[AF_UNIX]
2477  *	sosetopt:???
2478  */
2479 /* ARGSUSED */
2480 int
setsockopt(struct proc * p,struct setsockopt_args * uap,__unused int32_t * retval)2481 setsockopt(struct proc *p, struct setsockopt_args *uap,
2482     __unused int32_t *retval)
2483 {
2484 	struct socket *so;
2485 	struct sockopt sopt;
2486 	int error;
2487 
2488 	AUDIT_ARG(fd, uap->s);
2489 	if (uap->val == 0 && uap->valsize != 0) {
2490 		return EFAULT;
2491 	}
2492 	/* No bounds checking on size (it's unsigned) */
2493 
2494 	error = file_socket(uap->s, &so);
2495 	if (error) {
2496 		return error;
2497 	}
2498 
2499 	sopt.sopt_dir = SOPT_SET;
2500 	sopt.sopt_level = uap->level;
2501 	sopt.sopt_name = uap->name;
2502 	sopt.sopt_val = uap->val;
2503 	sopt.sopt_valsize = uap->valsize;
2504 	sopt.sopt_p = p;
2505 
2506 	if (so == NULL) {
2507 		error = EINVAL;
2508 		goto out;
2509 	}
2510 #if CONFIG_MACF_SOCKET_SUBSET
2511 	if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2512 	    &sopt)) != 0) {
2513 		goto out;
2514 	}
2515 #endif /* MAC_SOCKET_SUBSET */
2516 	error = sosetoptlock(so, &sopt, 1);     /* will lock socket */
2517 out:
2518 	file_drop(uap->s);
2519 	return error;
2520 }
2521 
2522 
2523 
2524 /*
2525  * Returns:	0			Success
2526  *		EINVAL
2527  *		EBADF
2528  *		EACCES			Mandatory Access Control failure
2529  *	copyin:EFAULT
2530  *	copyout:EFAULT
2531  *	file_socket:ENOTSOCK
2532  *	file_socket:EBADF
2533  *	sogetopt:???
2534  */
2535 int
getsockopt(struct proc * p,struct getsockopt_args * uap,__unused int32_t * retval)2536 getsockopt(struct proc *p, struct getsockopt_args  *uap,
2537     __unused int32_t *retval)
2538 {
2539 	int             error;
2540 	socklen_t       valsize;
2541 	struct sockopt  sopt;
2542 	struct socket *so;
2543 
2544 	error = file_socket(uap->s, &so);
2545 	if (error) {
2546 		return error;
2547 	}
2548 	if (uap->val) {
2549 		error = copyin(uap->avalsize, (caddr_t)&valsize,
2550 		    sizeof(valsize));
2551 		if (error) {
2552 			goto out;
2553 		}
2554 		/* No bounds checking on size (it's unsigned) */
2555 	} else {
2556 		valsize = 0;
2557 	}
2558 	sopt.sopt_dir = SOPT_GET;
2559 	sopt.sopt_level = uap->level;
2560 	sopt.sopt_name = uap->name;
2561 	sopt.sopt_val = uap->val;
2562 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2563 	sopt.sopt_p = p;
2564 
2565 	if (so == NULL) {
2566 		error = EBADF;
2567 		goto out;
2568 	}
2569 #if CONFIG_MACF_SOCKET_SUBSET
2570 	if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2571 	    &sopt)) != 0) {
2572 		goto out;
2573 	}
2574 #endif /* MAC_SOCKET_SUBSET */
2575 	error = sogetoptlock((struct socket *)so, &sopt, 1);    /* will lock */
2576 	if (error == 0) {
2577 		valsize = (socklen_t)sopt.sopt_valsize;
2578 		error = copyout((caddr_t)&valsize, uap->avalsize,
2579 		    sizeof(valsize));
2580 	}
2581 out:
2582 	file_drop(uap->s);
2583 	return error;
2584 }
2585 
2586 
2587 /*
2588  * Get socket name.
2589  *
2590  * Returns:	0			Success
2591  *		EBADF
2592  *	file_socket:ENOTSOCK
2593  *	file_socket:EBADF
2594  *	copyin:EFAULT
2595  *	copyout:EFAULT
2596  *	<pru_sockaddr>:ENOBUFS[TCP]
2597  *	<pru_sockaddr>:ECONNRESET[TCP]
2598  *	<pru_sockaddr>:EINVAL[AF_UNIX]
2599  *	<sf_getsockname>:???
2600  */
2601 /* ARGSUSED */
2602 int
getsockname(__unused struct proc * p,struct getsockname_args * uap,__unused int32_t * retval)2603 getsockname(__unused struct proc *p, struct getsockname_args *uap,
2604     __unused int32_t *retval)
2605 {
2606 	struct socket *so;
2607 	struct sockaddr *sa;
2608 	socklen_t len;
2609 	socklen_t sa_len;
2610 	int error;
2611 
2612 	error = file_socket(uap->fdes, &so);
2613 	if (error) {
2614 		return error;
2615 	}
2616 	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2617 	if (error) {
2618 		goto out;
2619 	}
2620 	if (so == NULL) {
2621 		error = EBADF;
2622 		goto out;
2623 	}
2624 	sa = 0;
2625 	socket_lock(so, 1);
2626 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2627 	if (error == 0) {
2628 		error = sflt_getsockname(so, &sa);
2629 		if (error == EJUSTRETURN) {
2630 			error = 0;
2631 		}
2632 	}
2633 	socket_unlock(so, 1);
2634 	if (error) {
2635 		goto bad;
2636 	}
2637 	if (sa == 0) {
2638 		len = 0;
2639 		goto gotnothing;
2640 	}
2641 
2642 	sa_len = sa->sa_len;
2643 	len = MIN(len, sa_len);
2644 	error = copyout((caddr_t)sa, uap->asa, len);
2645 	if (error) {
2646 		goto bad;
2647 	}
2648 	/* return the actual, untruncated address length */
2649 	len = sa_len;
2650 gotnothing:
2651 	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2652 bad:
2653 	free_sockaddr(sa);
2654 out:
2655 	file_drop(uap->fdes);
2656 	return error;
2657 }
2658 
2659 /*
2660  * Get name of peer for connected socket.
2661  *
2662  * Returns:	0			Success
2663  *		EBADF
2664  *		EINVAL
2665  *		ENOTCONN
2666  *	file_socket:ENOTSOCK
2667  *	file_socket:EBADF
2668  *	copyin:EFAULT
2669  *	copyout:EFAULT
2670  *	<pru_peeraddr>:???
2671  *	<sf_getpeername>:???
2672  */
2673 /* ARGSUSED */
2674 int
getpeername(__unused struct proc * p,struct getpeername_args * uap,__unused int32_t * retval)2675 getpeername(__unused struct proc *p, struct getpeername_args *uap,
2676     __unused int32_t *retval)
2677 {
2678 	struct socket *so;
2679 	struct sockaddr *sa;
2680 	socklen_t len;
2681 	socklen_t sa_len;
2682 	int error;
2683 
2684 	error = file_socket(uap->fdes, &so);
2685 	if (error) {
2686 		return error;
2687 	}
2688 	if (so == NULL) {
2689 		error = EBADF;
2690 		goto out;
2691 	}
2692 
2693 	socket_lock(so, 1);
2694 
2695 	if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2696 	    (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2697 		/* the socket has been shutdown, no more getpeername's */
2698 		socket_unlock(so, 1);
2699 		error = EINVAL;
2700 		goto out;
2701 	}
2702 
2703 	if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
2704 		socket_unlock(so, 1);
2705 		error = ENOTCONN;
2706 		goto out;
2707 	}
2708 	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2709 	if (error) {
2710 		socket_unlock(so, 1);
2711 		goto out;
2712 	}
2713 	sa = 0;
2714 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2715 	if (error == 0) {
2716 		error = sflt_getpeername(so, &sa);
2717 		if (error == EJUSTRETURN) {
2718 			error = 0;
2719 		}
2720 	}
2721 	socket_unlock(so, 1);
2722 	if (error) {
2723 		goto bad;
2724 	}
2725 	if (sa == 0) {
2726 		len = 0;
2727 		goto gotnothing;
2728 	}
2729 	sa_len = sa->sa_len;
2730 	len = MIN(len, sa_len);
2731 	error = copyout(sa, uap->asa, len);
2732 	if (error) {
2733 		goto bad;
2734 	}
2735 	/* return the actual, untruncated address length */
2736 	len = sa_len;
2737 gotnothing:
2738 	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2739 bad:
2740 	free_sockaddr(sa);
2741 out:
2742 	file_drop(uap->fdes);
2743 	return error;
2744 }
2745 
2746 int
sockargs(struct mbuf ** mp,user_addr_t data,socklen_t buflen,int type)2747 sockargs(struct mbuf **mp, user_addr_t data, socklen_t buflen, int type)
2748 {
2749 	struct sockaddr *sa;
2750 	struct mbuf *m;
2751 	int error;
2752 	socklen_t alloc_buflen = buflen;
2753 
2754 	if (buflen > INT_MAX / 2) {
2755 		return EINVAL;
2756 	}
2757 	if (type == MT_SONAME && (buflen > SOCK_MAXADDRLEN ||
2758 	    buflen < offsetof(struct sockaddr, sa_data[0]))) {
2759 		return EINVAL;
2760 	}
2761 	if (type == MT_CONTROL && buflen < sizeof(struct cmsghdr)) {
2762 		return EINVAL;
2763 	}
2764 
2765 #ifdef __LP64__
2766 	/*
2767 	 * The fd's in the buffer must expand to be pointers, thus we need twice
2768 	 * as much space
2769 	 */
2770 	if (type == MT_CONTROL) {
2771 		alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
2772 		    sizeof(struct cmsghdr);
2773 	}
2774 #endif
2775 	if (alloc_buflen > MLEN) {
2776 		if (type == MT_SONAME && alloc_buflen <= 112) {
2777 			alloc_buflen = MLEN;    /* unix domain compat. hack */
2778 		} else if (alloc_buflen > MCLBYTES) {
2779 			return EINVAL;
2780 		}
2781 	}
2782 	m = m_get(M_WAIT, type);
2783 	if (m == NULL) {
2784 		return ENOBUFS;
2785 	}
2786 	if (alloc_buflen > MLEN) {
2787 		MCLGET(m, M_WAIT);
2788 		if ((m->m_flags & M_EXT) == 0) {
2789 			m_free(m);
2790 			return ENOBUFS;
2791 		}
2792 	}
2793 	/*
2794 	 * K64: We still copyin the original buflen because it gets expanded
2795 	 * later and we lie about the size of the mbuf because it only affects
2796 	 * unp_* functions
2797 	 */
2798 	m->m_len = buflen;
2799 	error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2800 	if (error) {
2801 		(void) m_free(m);
2802 	} else {
2803 		*mp = m;
2804 		if (type == MT_SONAME) {
2805 			sa = mtod(m, struct sockaddr *);
2806 			VERIFY(buflen <= SOCK_MAXADDRLEN);
2807 			sa->sa_len = (__uint8_t)buflen;
2808 		}
2809 	}
2810 	return error;
2811 }
2812 
2813 /*
2814  * Given a user_addr_t of length len, allocate and fill out a *sa.
2815  *
2816  * Returns:	0			Success
2817  *		ENAMETOOLONG		Filename too long
2818  *		EINVAL			Invalid argument
2819  *		ENOMEM			Not enough space
2820  *		copyin:EFAULT		Bad address
2821  */
2822 static int
getsockaddr(struct socket * so,struct sockaddr ** namp,user_addr_t uaddr,size_t len,boolean_t translate_unspec)2823 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
2824     size_t len, boolean_t translate_unspec)
2825 {
2826 	struct sockaddr *sa;
2827 	int error;
2828 
2829 	if (len > SOCK_MAXADDRLEN) {
2830 		return ENAMETOOLONG;
2831 	}
2832 
2833 	if (len < offsetof(struct sockaddr, sa_data[0])) {
2834 		return EINVAL;
2835 	}
2836 
2837 	sa = (struct sockaddr *)alloc_sockaddr(len, Z_WAITOK | Z_NOFAIL);
2838 
2839 	error = copyin(uaddr, (caddr_t)sa, len);
2840 	if (error) {
2841 		free_sockaddr(sa);
2842 	} else {
2843 		/*
2844 		 * Force sa_family to AF_INET on AF_INET sockets to handle
2845 		 * legacy applications that use AF_UNSPEC (0).  On all other
2846 		 * sockets we leave it unchanged and let the lower layer
2847 		 * handle it.
2848 		 */
2849 		if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2850 		    SOCK_CHECK_DOM(so, PF_INET) &&
2851 		    len == sizeof(struct sockaddr_in)) {
2852 			sa->sa_family = AF_INET;
2853 		}
2854 		VERIFY(len <= SOCK_MAXADDRLEN);
2855 		sa->sa_len = (__uint8_t)len;
2856 		*namp = sa;
2857 	}
2858 	return error;
2859 }
2860 
2861 static int
getsockaddr_s(struct socket * so,struct sockaddr_storage * ss,user_addr_t uaddr,size_t len,boolean_t translate_unspec)2862 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
2863     user_addr_t uaddr, size_t len, boolean_t translate_unspec)
2864 {
2865 	int error;
2866 
2867 	if (ss == NULL || uaddr == USER_ADDR_NULL ||
2868 	    len < offsetof(struct sockaddr, sa_data[0])) {
2869 		return EINVAL;
2870 	}
2871 
2872 	/*
2873 	 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2874 	 * so the check here is inclusive.
2875 	 */
2876 	if (len > sizeof(*ss)) {
2877 		return ENAMETOOLONG;
2878 	}
2879 
2880 	bzero(ss, sizeof(*ss));
2881 	error = copyin(uaddr, (caddr_t)ss, len);
2882 	if (error == 0) {
2883 		/*
2884 		 * Force sa_family to AF_INET on AF_INET sockets to handle
2885 		 * legacy applications that use AF_UNSPEC (0).  On all other
2886 		 * sockets we leave it unchanged and let the lower layer
2887 		 * handle it.
2888 		 */
2889 		if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2890 		    SOCK_CHECK_DOM(so, PF_INET) &&
2891 		    len == sizeof(struct sockaddr_in)) {
2892 			ss->ss_family = AF_INET;
2893 		}
2894 
2895 		ss->ss_len = (__uint8_t)len;
2896 	}
2897 	return error;
2898 }
2899 
2900 int
internalize_user_msghdr_array(const void * src,int spacetype,int direction,u_int count,struct user_msghdr_x * dst,struct uio ** uiop)2901 internalize_user_msghdr_array(const void *src, int spacetype, int direction,
2902     u_int count, struct user_msghdr_x *dst, struct uio **uiop)
2903 {
2904 	int error = 0;
2905 	u_int i;
2906 	u_int namecnt = 0;
2907 	u_int ctlcnt = 0;
2908 
2909 	for (i = 0; i < count; i++) {
2910 		uio_t auio;
2911 		struct user_iovec *iovp;
2912 		struct user_msghdr_x *user_msg = dst + i;
2913 
2914 		if (spacetype == UIO_USERSPACE64) {
2915 			const struct user64_msghdr_x *msghdr64;
2916 
2917 			msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2918 
2919 			user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
2920 			user_msg->msg_namelen = msghdr64->msg_namelen;
2921 			user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
2922 			user_msg->msg_iovlen = msghdr64->msg_iovlen;
2923 			user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
2924 			user_msg->msg_controllen = msghdr64->msg_controllen;
2925 			user_msg->msg_flags = msghdr64->msg_flags;
2926 			user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
2927 		} else {
2928 			const struct user32_msghdr_x *msghdr32;
2929 
2930 			msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2931 
2932 			user_msg->msg_name = msghdr32->msg_name;
2933 			user_msg->msg_namelen = msghdr32->msg_namelen;
2934 			user_msg->msg_iov = msghdr32->msg_iov;
2935 			user_msg->msg_iovlen = msghdr32->msg_iovlen;
2936 			user_msg->msg_control = msghdr32->msg_control;
2937 			user_msg->msg_controllen = msghdr32->msg_controllen;
2938 			user_msg->msg_flags = msghdr32->msg_flags;
2939 			user_msg->msg_datalen = msghdr32->msg_datalen;
2940 		}
2941 
2942 		if (user_msg->msg_iovlen <= 0 ||
2943 		    user_msg->msg_iovlen > UIO_MAXIOV) {
2944 			error = EMSGSIZE;
2945 			goto done;
2946 		}
2947 		auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2948 		    direction);
2949 		if (auio == NULL) {
2950 			error = ENOMEM;
2951 			goto done;
2952 		}
2953 		uiop[i] = auio;
2954 
2955 		iovp = uio_iovsaddr(auio);
2956 		if (iovp == NULL) {
2957 			error = ENOMEM;
2958 			goto done;
2959 		}
2960 		error = copyin_user_iovec_array(user_msg->msg_iov,
2961 		    spacetype, user_msg->msg_iovlen, iovp);
2962 		if (error) {
2963 			goto done;
2964 		}
2965 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2966 
2967 		error = uio_calculateresid(auio);
2968 		if (error) {
2969 			goto done;
2970 		}
2971 		user_msg->msg_datalen = uio_resid(auio);
2972 
2973 		if (user_msg->msg_name && user_msg->msg_namelen) {
2974 			namecnt++;
2975 		}
2976 		if (user_msg->msg_control && user_msg->msg_controllen) {
2977 			ctlcnt++;
2978 		}
2979 	}
2980 done:
2981 
2982 	return error;
2983 }
2984 
2985 int
internalize_recv_msghdr_array(const void * src,int spacetype,int direction,u_int count,struct user_msghdr_x * dst,struct recv_msg_elem * recv_msg_array)2986 internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
2987     u_int count, struct user_msghdr_x *dst,
2988     struct recv_msg_elem *recv_msg_array)
2989 {
2990 	int error = 0;
2991 	u_int i;
2992 
2993 	for (i = 0; i < count; i++) {
2994 		struct user_iovec *iovp;
2995 		struct user_msghdr_x *user_msg = dst + i;
2996 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2997 
2998 		if (spacetype == UIO_USERSPACE64) {
2999 			const struct user64_msghdr_x *msghdr64;
3000 
3001 			msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3002 
3003 			user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
3004 			user_msg->msg_namelen = msghdr64->msg_namelen;
3005 			user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
3006 			user_msg->msg_iovlen = msghdr64->msg_iovlen;
3007 			user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
3008 			user_msg->msg_controllen = msghdr64->msg_controllen;
3009 			user_msg->msg_flags = msghdr64->msg_flags;
3010 			user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
3011 		} else {
3012 			const struct user32_msghdr_x *msghdr32;
3013 
3014 			msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3015 
3016 			user_msg->msg_name = msghdr32->msg_name;
3017 			user_msg->msg_namelen = msghdr32->msg_namelen;
3018 			user_msg->msg_iov = msghdr32->msg_iov;
3019 			user_msg->msg_iovlen = msghdr32->msg_iovlen;
3020 			user_msg->msg_control = msghdr32->msg_control;
3021 			user_msg->msg_controllen = msghdr32->msg_controllen;
3022 			user_msg->msg_flags = msghdr32->msg_flags;
3023 			user_msg->msg_datalen = msghdr32->msg_datalen;
3024 		}
3025 
3026 		if (user_msg->msg_iovlen <= 0 ||
3027 		    user_msg->msg_iovlen > UIO_MAXIOV) {
3028 			error = EMSGSIZE;
3029 			goto done;
3030 		}
3031 		recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3032 		    spacetype, direction);
3033 		if (recv_msg_elem->uio == NULL) {
3034 			error = ENOMEM;
3035 			goto done;
3036 		}
3037 
3038 		iovp = uio_iovsaddr(recv_msg_elem->uio);
3039 		if (iovp == NULL) {
3040 			error = ENOMEM;
3041 			goto done;
3042 		}
3043 		error = copyin_user_iovec_array(user_msg->msg_iov,
3044 		    spacetype, user_msg->msg_iovlen, iovp);
3045 		if (error) {
3046 			goto done;
3047 		}
3048 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3049 
3050 		error = uio_calculateresid(recv_msg_elem->uio);
3051 		if (error) {
3052 			goto done;
3053 		}
3054 		user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3055 
3056 		if (user_msg->msg_name && user_msg->msg_namelen) {
3057 			recv_msg_elem->which |= SOCK_MSG_SA;
3058 		}
3059 		if (user_msg->msg_control && user_msg->msg_controllen) {
3060 			recv_msg_elem->which |= SOCK_MSG_CONTROL;
3061 		}
3062 	}
3063 done:
3064 
3065 	return error;
3066 }
3067 
3068 u_int
externalize_user_msghdr_array(void * dst,int spacetype,int direction,u_int count,const struct user_msghdr_x * src,struct uio ** uiop)3069 externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3070     u_int count, const struct user_msghdr_x *src, struct uio **uiop)
3071 {
3072 #pragma unused(direction)
3073 	u_int i;
3074 	int seenlast = 0;
3075 	u_int retcnt = 0;
3076 
3077 	for (i = 0; i < count; i++) {
3078 		const struct user_msghdr_x *user_msg = src + i;
3079 		uio_t auio = uiop[i];
3080 		user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3081 
3082 		if (user_msg->msg_datalen != 0 && len == 0) {
3083 			seenlast = 1;
3084 		}
3085 
3086 		if (seenlast == 0) {
3087 			retcnt++;
3088 		}
3089 
3090 		if (spacetype == UIO_USERSPACE64) {
3091 			struct user64_msghdr_x *msghdr64;
3092 
3093 			msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3094 
3095 			msghdr64->msg_flags = user_msg->msg_flags;
3096 			msghdr64->msg_datalen = len;
3097 		} else {
3098 			struct user32_msghdr_x *msghdr32;
3099 
3100 			msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3101 
3102 			msghdr32->msg_flags = user_msg->msg_flags;
3103 			msghdr32->msg_datalen = (user32_size_t)len;
3104 		}
3105 	}
3106 	return retcnt;
3107 }
3108 
3109 u_int
externalize_recv_msghdr_array(struct proc * p,struct socket * so,void * dst,u_int count,struct user_msghdr_x * src,struct recv_msg_elem * recv_msg_array,int * ret_error)3110 externalize_recv_msghdr_array(struct proc *p, struct socket *so, void *dst,
3111     u_int count, struct user_msghdr_x *src,
3112     struct recv_msg_elem *recv_msg_array, int *ret_error)
3113 {
3114 	u_int i;
3115 	u_int retcnt = 0;
3116 	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3117 
3118 	*ret_error = 0;
3119 
3120 	for (i = 0; i < count; i++) {
3121 		struct user_msghdr_x *user_msg = src + i;
3122 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3123 		user_ssize_t len = 0;
3124 		int error;
3125 
3126 		len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3127 
3128 		if ((recv_msg_elem->which & SOCK_MSG_DATA)) {
3129 			retcnt++;
3130 
3131 
3132 			if (recv_msg_elem->which & SOCK_MSG_SA) {
3133 				error = copyout_sa(recv_msg_elem->psa, user_msg->msg_name,
3134 				    &user_msg->msg_namelen);
3135 				if (error != 0) {
3136 					*ret_error = error;
3137 					return 0;
3138 				}
3139 			}
3140 			if (recv_msg_elem->which & SOCK_MSG_CONTROL) {
3141 				error = copyout_control(p, recv_msg_elem->controlp,
3142 				    user_msg->msg_control, &user_msg->msg_controllen,
3143 				    &recv_msg_elem->flags, so);
3144 				if (error != 0) {
3145 					*ret_error = error;
3146 					return 0;
3147 				}
3148 			}
3149 		}
3150 
3151 		if (spacetype == UIO_USERSPACE64) {
3152 			struct user64_msghdr_x *msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3153 
3154 			msghdr64->msg_namelen = user_msg->msg_namelen;
3155 			msghdr64->msg_controllen = user_msg->msg_controllen;
3156 			msghdr64->msg_flags = recv_msg_elem->flags;
3157 			msghdr64->msg_datalen = len;
3158 		} else {
3159 			struct user32_msghdr_x *msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3160 
3161 			msghdr32->msg_namelen = user_msg->msg_namelen;
3162 			msghdr32->msg_controllen = user_msg->msg_controllen;
3163 			msghdr32->msg_flags = recv_msg_elem->flags;
3164 			msghdr32->msg_datalen = (user32_size_t)len;
3165 		}
3166 	}
3167 	return retcnt;
3168 }
3169 
3170 void
free_uio_array(struct uio ** uiop,u_int count)3171 free_uio_array(struct uio **uiop, u_int count)
3172 {
3173 	u_int i;
3174 
3175 	for (i = 0; i < count; i++) {
3176 		if (uiop[i] != NULL) {
3177 			uio_free(uiop[i]);
3178 		}
3179 	}
3180 }
3181 
3182 __private_extern__ user_ssize_t
uio_array_resid(struct uio ** uiop,u_int count)3183 uio_array_resid(struct uio **uiop, u_int count)
3184 {
3185 	user_ssize_t len = 0;
3186 	u_int i;
3187 
3188 	for (i = 0; i < count; i++) {
3189 		struct uio *auio = uiop[i];
3190 
3191 		if (auio != NULL) {
3192 			len += uio_resid(auio);
3193 		}
3194 	}
3195 	return len;
3196 }
3197 
3198 static boolean_t
uio_array_is_valid(struct uio ** uiop,u_int count)3199 uio_array_is_valid(struct uio **uiop, u_int count)
3200 {
3201 	user_ssize_t len = 0;
3202 	u_int i;
3203 
3204 	for (i = 0; i < count; i++) {
3205 		struct uio *auio = uiop[i];
3206 
3207 		if (auio != NULL) {
3208 			user_ssize_t resid = uio_resid(auio);
3209 
3210 			/*
3211 			 * Sanity check on the validity of the iovec:
3212 			 * no point of going over sb_max
3213 			 */
3214 			if (resid < 0 || resid > (user_ssize_t)sb_max) {
3215 				return false;
3216 			}
3217 
3218 			len += resid;
3219 			if (len < 0 || len > (user_ssize_t)sb_max) {
3220 				return false;
3221 			}
3222 		}
3223 	}
3224 	return true;
3225 }
3226 
3227 
3228 struct recv_msg_elem *
alloc_recv_msg_array(u_int count)3229 alloc_recv_msg_array(u_int count)
3230 {
3231 	return kalloc_type(struct recv_msg_elem, count, Z_WAITOK | Z_ZERO);
3232 }
3233 
3234 void
free_recv_msg_array(struct recv_msg_elem * recv_msg_array,u_int count)3235 free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3236 {
3237 	if (recv_msg_array == NULL) {
3238 		return;
3239 	}
3240 	for (uint32_t i = 0; i < count; i++) {
3241 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3242 
3243 		if (recv_msg_elem->uio != NULL) {
3244 			uio_free(recv_msg_elem->uio);
3245 		}
3246 		free_sockaddr(recv_msg_elem->psa);
3247 		if (recv_msg_elem->controlp != NULL) {
3248 			m_freem(recv_msg_elem->controlp);
3249 		}
3250 	}
3251 	kfree_type(struct recv_msg_elem, count, recv_msg_array);
3252 }
3253 
3254 
3255 __private_extern__ user_ssize_t
recv_msg_array_resid(struct recv_msg_elem * recv_msg_array,u_int count)3256 recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3257 {
3258 	user_ssize_t len = 0;
3259 	u_int i;
3260 
3261 	for (i = 0; i < count; i++) {
3262 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3263 
3264 		if (recv_msg_elem->uio != NULL) {
3265 			len += uio_resid(recv_msg_elem->uio);
3266 		}
3267 	}
3268 	return len;
3269 }
3270 
3271 int
recv_msg_array_is_valid(struct recv_msg_elem * recv_msg_array,u_int count)3272 recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3273 {
3274 	user_ssize_t len = 0;
3275 	u_int i;
3276 
3277 	for (i = 0; i < count; i++) {
3278 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3279 
3280 		if (recv_msg_elem->uio != NULL) {
3281 			user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3282 
3283 			/*
3284 			 * Sanity check on the validity of the iovec:
3285 			 * no point of going over sb_max
3286 			 */
3287 			if (resid < 0 || (u_int32_t)resid > sb_max) {
3288 				return 0;
3289 			}
3290 
3291 			len += resid;
3292 			if (len < 0 || (u_int32_t)len > sb_max) {
3293 				return 0;
3294 			}
3295 		}
3296 	}
3297 	return 1;
3298 }
3299 
3300 #if SENDFILE
3301 
3302 #define SFUIOBUFS 64
3303 
3304 /* Macros to compute the number of mbufs needed depending on cluster size */
3305 #define HOWMANY_16K(n)  ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3306 #define HOWMANY_4K(n)   ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3307 
3308 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3309 #define SENDFILE_MAX_BYTES      (SFUIOBUFS << PGSHIFT)
3310 
3311 /* Upper send limit in the number of mbuf clusters */
3312 #define SENDFILE_MAX_16K        HOWMANY_16K(SENDFILE_MAX_BYTES)
3313 #define SENDFILE_MAX_4K         HOWMANY_4K(SENDFILE_MAX_BYTES)
3314 
3315 static void
alloc_sendpkt(int how,size_t pktlen,unsigned int * maxchunks,struct mbuf ** m,boolean_t jumbocl)3316 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3317     struct mbuf **m, boolean_t jumbocl)
3318 {
3319 	unsigned int needed;
3320 
3321 	if (pktlen == 0) {
3322 		panic("%s: pktlen (%ld) must be non-zero", __func__, pktlen);
3323 	}
3324 
3325 	/*
3326 	 * Try to allocate for the whole thing.  Since we want full control
3327 	 * over the buffer size and be able to accept partial result, we can't
3328 	 * use mbuf_allocpacket().  The logic below is similar to sosend().
3329 	 */
3330 	*m = NULL;
3331 	if (pktlen > MBIGCLBYTES && jumbocl) {
3332 		needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3333 		*m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3334 	}
3335 	if (*m == NULL) {
3336 		needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3337 		*m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3338 	}
3339 
3340 	/*
3341 	 * Our previous attempt(s) at allocation had failed; the system
3342 	 * may be short on mbufs, and we want to block until they are
3343 	 * available.  This time, ask just for 1 mbuf and don't return
3344 	 * until we get it.
3345 	 */
3346 	if (*m == NULL) {
3347 		needed = 1;
3348 		*m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3349 	}
3350 	if (*m == NULL) {
3351 		panic("%s: blocking allocation returned NULL", __func__);
3352 	}
3353 
3354 	*maxchunks = needed;
3355 }
3356 
3357 /*
3358  * sendfile(2).
3359  * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3360  *	 struct sf_hdtr *hdtr, int flags)
3361  *
3362  * Send a file specified by 'fd' and starting at 'offset' to a socket
3363  * specified by 's'. Send only '*nbytes' of the file or until EOF if
3364  * *nbytes == 0. Optionally add a header and/or trailer to the socket
3365  * output. If specified, write the total number of bytes sent into *nbytes.
3366  */
3367 int
sendfile(struct proc * p,struct sendfile_args * uap,__unused int * retval)3368 sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
3369 {
3370 	struct fileproc *fp;
3371 	struct vnode *vp;
3372 	struct socket *so;
3373 	struct writev_nocancel_args nuap;
3374 	user_ssize_t writev_retval;
3375 	struct user_sf_hdtr user_hdtr;
3376 	struct user32_sf_hdtr user32_hdtr;
3377 	struct user64_sf_hdtr user64_hdtr;
3378 	off_t off, xfsize;
3379 	off_t nbytes = 0, sbytes = 0;
3380 	int error = 0;
3381 	size_t sizeof_hdtr;
3382 	off_t file_size;
3383 	struct vfs_context context = *vfs_context_current();
3384 
3385 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
3386 
3387 	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3388 	    0, 0, 0, 0);
3389 
3390 	AUDIT_ARG(fd, uap->fd);
3391 	AUDIT_ARG(value32, uap->s);
3392 
3393 	/*
3394 	 * Do argument checking. Must be a regular file in, stream
3395 	 * type and connected socket out, positive offset.
3396 	 */
3397 	if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3398 		goto done;
3399 	}
3400 	if ((fp->f_flag & FREAD) == 0) {
3401 		error = EBADF;
3402 		goto done1;
3403 	}
3404 	if (vnode_isreg(vp) == 0) {
3405 		error = ENOTSUP;
3406 		goto done1;
3407 	}
3408 	error = file_socket(uap->s, &so);
3409 	if (error) {
3410 		goto done1;
3411 	}
3412 	if (so == NULL) {
3413 		error = EBADF;
3414 		goto done2;
3415 	}
3416 	if (so->so_type != SOCK_STREAM) {
3417 		error = EINVAL;
3418 		goto done2;
3419 	}
3420 	if ((so->so_state & SS_ISCONNECTED) == 0) {
3421 		error = ENOTCONN;
3422 		goto done2;
3423 	}
3424 	if (uap->offset < 0) {
3425 		error = EINVAL;
3426 		goto done2;
3427 	}
3428 	if (uap->nbytes == USER_ADDR_NULL) {
3429 		error = EINVAL;
3430 		goto done2;
3431 	}
3432 	if (uap->flags != 0) {
3433 		error = EINVAL;
3434 		goto done2;
3435 	}
3436 
3437 	context.vc_ucred = fp->fp_glob->fg_cred;
3438 
3439 #if CONFIG_MACF_SOCKET_SUBSET
3440 	/* JMM - fetch connected sockaddr? */
3441 	error = mac_socket_check_send(context.vc_ucred, so, NULL);
3442 	if (error) {
3443 		goto done2;
3444 	}
3445 #endif
3446 
3447 	/*
3448 	 * Get number of bytes to send
3449 	 * Should it applies to size of header and trailer?
3450 	 */
3451 	error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3452 	if (error) {
3453 		goto done2;
3454 	}
3455 
3456 	/*
3457 	 * If specified, get the pointer to the sf_hdtr struct for
3458 	 * any headers/trailers.
3459 	 */
3460 	if (uap->hdtr != USER_ADDR_NULL) {
3461 		caddr_t hdtrp;
3462 
3463 		bzero(&user_hdtr, sizeof(user_hdtr));
3464 		if (is_p_64bit_process) {
3465 			hdtrp = (caddr_t)&user64_hdtr;
3466 			sizeof_hdtr = sizeof(user64_hdtr);
3467 		} else {
3468 			hdtrp = (caddr_t)&user32_hdtr;
3469 			sizeof_hdtr = sizeof(user32_hdtr);
3470 		}
3471 		error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3472 		if (error) {
3473 			goto done2;
3474 		}
3475 		if (is_p_64bit_process) {
3476 			user_hdtr.headers = user64_hdtr.headers;
3477 			user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3478 			user_hdtr.trailers = user64_hdtr.trailers;
3479 			user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3480 		} else {
3481 			user_hdtr.headers = user32_hdtr.headers;
3482 			user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3483 			user_hdtr.trailers = user32_hdtr.trailers;
3484 			user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3485 		}
3486 
3487 		/*
3488 		 * Send any headers. Wimp out and use writev(2).
3489 		 */
3490 		if (user_hdtr.headers != USER_ADDR_NULL) {
3491 			bzero(&nuap, sizeof(struct writev_args));
3492 			nuap.fd = uap->s;
3493 			nuap.iovp = user_hdtr.headers;
3494 			nuap.iovcnt = user_hdtr.hdr_cnt;
3495 			error = writev_nocancel(p, &nuap, &writev_retval);
3496 			if (error) {
3497 				goto done2;
3498 			}
3499 			sbytes += writev_retval;
3500 		}
3501 	}
3502 
3503 	/*
3504 	 * Get the file size for 2 reasons:
3505 	 *  1. We don't want to allocate more mbufs than necessary
3506 	 *  2. We don't want to read past the end of file
3507 	 */
3508 	if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3509 		goto done2;
3510 	}
3511 
3512 	/*
3513 	 * Simply read file data into a chain of mbufs that used with scatter
3514 	 * gather reads. We're not (yet?) setup to use zero copy external
3515 	 * mbufs that point to the file pages.
3516 	 */
3517 	socket_lock(so, 1);
3518 	error = sblock(&so->so_snd, SBL_WAIT);
3519 	if (error) {
3520 		socket_unlock(so, 1);
3521 		goto done2;
3522 	}
3523 	for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3524 		mbuf_t  m0 = NULL, m;
3525 		unsigned int    nbufs = SFUIOBUFS, i;
3526 		uio_t   auio;
3527 		uio_stackbuf_t    uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
3528 		size_t  uiolen;
3529 		user_ssize_t    rlen;
3530 		off_t   pgoff;
3531 		size_t  pktlen;
3532 		boolean_t jumbocl;
3533 
3534 		/*
3535 		 * Calculate the amount to transfer.
3536 		 * Align to round number of pages.
3537 		 * Not to exceed send socket buffer,
3538 		 * the EOF, or the passed in nbytes.
3539 		 */
3540 		xfsize = sbspace(&so->so_snd);
3541 
3542 		if (xfsize <= 0) {
3543 			if (so->so_state & SS_CANTSENDMORE) {
3544 				error = EPIPE;
3545 				goto done3;
3546 			} else if ((so->so_state & SS_NBIO)) {
3547 				error = EAGAIN;
3548 				goto done3;
3549 			} else {
3550 				xfsize = PAGE_SIZE;
3551 			}
3552 		}
3553 
3554 		if (xfsize > SENDFILE_MAX_BYTES) {
3555 			xfsize = SENDFILE_MAX_BYTES;
3556 		} else if (xfsize > PAGE_SIZE) {
3557 			xfsize = trunc_page(xfsize);
3558 		}
3559 		pgoff = off & PAGE_MASK_64;
3560 		if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3561 			xfsize = PAGE_SIZE_64 - pgoff;
3562 		}
3563 		if (nbytes && xfsize > (nbytes - sbytes)) {
3564 			xfsize = nbytes - sbytes;
3565 		}
3566 		if (xfsize <= 0) {
3567 			break;
3568 		}
3569 		if (off + xfsize > file_size) {
3570 			xfsize = file_size - off;
3571 		}
3572 		if (xfsize <= 0) {
3573 			break;
3574 		}
3575 
3576 		/*
3577 		 * Attempt to use larger than system page-size clusters for
3578 		 * large writes only if there is a jumbo cluster pool and
3579 		 * if the socket is marked accordingly.
3580 		 */
3581 		jumbocl = sosendjcl && njcl > 0 &&
3582 		    ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3583 
3584 		socket_unlock(so, 0);
3585 		alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3586 		pktlen = mbuf_pkthdr_maxlen(m0);
3587 		if (pktlen < (size_t)xfsize) {
3588 			xfsize = pktlen;
3589 		}
3590 
3591 		auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3592 		    UIO_READ, &uio_buf[0], sizeof(uio_buf));
3593 		if (auio == NULL) {
3594 			printf("sendfile failed. nbufs = %d. %s", nbufs,
3595 			    "File a radar related to rdar://10146739.\n");
3596 			mbuf_freem(m0);
3597 			error = ENXIO;
3598 			socket_lock(so, 0);
3599 			goto done3;
3600 		}
3601 
3602 		for (i = 0, m = m0, uiolen = 0;
3603 		    i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3604 		    i++, m = mbuf_next(m)) {
3605 			size_t mlen = mbuf_maxlen(m);
3606 
3607 			if (mlen + uiolen > (size_t)xfsize) {
3608 				mlen = xfsize - uiolen;
3609 			}
3610 			mbuf_setlen(m, mlen);
3611 			uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3612 			    mlen);
3613 			uiolen += mlen;
3614 		}
3615 
3616 		if (xfsize != uio_resid(auio)) {
3617 			printf("sendfile: xfsize: %lld != uio_resid(auio): "
3618 			    "%lld\n", xfsize, (long long)uio_resid(auio));
3619 		}
3620 
3621 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3622 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3623 		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3624 		error = fo_read(fp, auio, FOF_OFFSET, &context);
3625 		socket_lock(so, 0);
3626 		if (error != 0) {
3627 			if (uio_resid(auio) != xfsize && (error == ERESTART ||
3628 			    error == EINTR || error == EWOULDBLOCK)) {
3629 				error = 0;
3630 			} else {
3631 				mbuf_freem(m0);
3632 				goto done3;
3633 			}
3634 		}
3635 		xfsize -= uio_resid(auio);
3636 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3637 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3638 		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3639 
3640 		if (xfsize == 0) {
3641 			// printf("sendfile: fo_read 0 bytes, EOF\n");
3642 			break;
3643 		}
3644 		if (xfsize + off > file_size) {
3645 			printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3646 			    "%lld\n", xfsize, off, file_size);
3647 		}
3648 		for (i = 0, m = m0, rlen = 0;
3649 		    i < nbufs && m != NULL && rlen < xfsize;
3650 		    i++, m = mbuf_next(m)) {
3651 			size_t mlen = mbuf_maxlen(m);
3652 
3653 			if (rlen + mlen > (size_t)xfsize) {
3654 				mlen = xfsize - rlen;
3655 			}
3656 			mbuf_setlen(m, mlen);
3657 
3658 			rlen += mlen;
3659 		}
3660 		mbuf_pkthdr_setlen(m0, xfsize);
3661 
3662 retry_space:
3663 		/*
3664 		 * Make sure that the socket is still able to take more data.
3665 		 * CANTSENDMORE being true usually means that the connection
3666 		 * was closed. so_error is true when an error was sensed after
3667 		 * a previous send.
3668 		 * The state is checked after the page mapping and buffer
3669 		 * allocation above since those operations may block and make
3670 		 * any socket checks stale. From this point forward, nothing
3671 		 * blocks before the pru_send (or more accurately, any blocking
3672 		 * results in a loop back to here to re-check).
3673 		 */
3674 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3675 			if (so->so_state & SS_CANTSENDMORE) {
3676 				error = EPIPE;
3677 			} else {
3678 				error = so->so_error;
3679 				so->so_error = 0;
3680 			}
3681 			m_freem(m0);
3682 			goto done3;
3683 		}
3684 		/*
3685 		 * Wait for socket space to become available. We do this just
3686 		 * after checking the connection state above in order to avoid
3687 		 * a race condition with sbwait().
3688 		 */
3689 		if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
3690 			if (so->so_state & SS_NBIO) {
3691 				m_freem(m0);
3692 				error = EAGAIN;
3693 				goto done3;
3694 			}
3695 			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3696 			    DBG_FUNC_START), uap->s, 0, 0, 0, 0);
3697 			error = sbwait(&so->so_snd);
3698 			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3699 			    DBG_FUNC_END), uap->s, 0, 0, 0, 0);
3700 			/*
3701 			 * An error from sbwait usually indicates that we've
3702 			 * been interrupted by a signal. If we've sent anything
3703 			 * then return bytes sent, otherwise return the error.
3704 			 */
3705 			if (error) {
3706 				m_freem(m0);
3707 				goto done3;
3708 			}
3709 			goto retry_space;
3710 		}
3711 
3712 		struct mbuf *control = NULL;
3713 		{
3714 			/*
3715 			 * Socket filter processing
3716 			 */
3717 
3718 			error = sflt_data_out(so, NULL, &m0, &control, 0);
3719 			if (error) {
3720 				if (error == EJUSTRETURN) {
3721 					error = 0;
3722 					continue;
3723 				}
3724 				goto done3;
3725 			}
3726 			/*
3727 			 * End Socket filter processing
3728 			 */
3729 		}
3730 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3731 		    uap->s, 0, 0, 0, 0);
3732 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
3733 		    0, control, p);
3734 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3735 		    uap->s, 0, 0, 0, 0);
3736 		if (error) {
3737 			goto done3;
3738 		}
3739 	}
3740 	sbunlock(&so->so_snd, FALSE);   /* will unlock socket */
3741 	/*
3742 	 * Send trailers. Wimp out and use writev(2).
3743 	 */
3744 	if (uap->hdtr != USER_ADDR_NULL &&
3745 	    user_hdtr.trailers != USER_ADDR_NULL) {
3746 		bzero(&nuap, sizeof(struct writev_args));
3747 		nuap.fd = uap->s;
3748 		nuap.iovp = user_hdtr.trailers;
3749 		nuap.iovcnt = user_hdtr.trl_cnt;
3750 		error = writev_nocancel(p, &nuap, &writev_retval);
3751 		if (error) {
3752 			goto done2;
3753 		}
3754 		sbytes += writev_retval;
3755 	}
3756 done2:
3757 	file_drop(uap->s);
3758 done1:
3759 	file_drop(uap->fd);
3760 done:
3761 	if (uap->nbytes != USER_ADDR_NULL) {
3762 		/* XXX this appears bogus for some early failure conditions */
3763 		copyout(&sbytes, uap->nbytes, sizeof(off_t));
3764 	}
3765 	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3766 	    (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3767 	    (unsigned int)(sbytes & 0x0ffffffff), error, 0);
3768 	return error;
3769 done3:
3770 	sbunlock(&so->so_snd, FALSE);   /* will unlock socket */
3771 	goto done2;
3772 }
3773 
3774 
3775 #endif /* SENDFILE */
3776