xref: /xnu-10063.121.3/bsd/kern/uipc_syscalls.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2000-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1989, 1990, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * sendfile(2) and related extensions:
33  * Copyright (c) 1998, David Greenman. All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgement:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  * 4. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  *
63  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
64  */
65 /*
66  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67  * support for mandatory and extensible security protections.  This notice
68  * is included in support of clause 2.2 (b) of the Apple Public License,
69  * Version 2.0.
70  */
71 
72 #include <sys/cdefs.h>
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/filedesc.h>
76 #include <sys/proc_internal.h>
77 #include <sys/file_internal.h>
78 #include <sys/vnode_internal.h>
79 #include <sys/malloc.h>
80 #include <sys/mcache.h>
81 #include <sys/mbuf.h>
82 #include <kern/locks.h>
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/signalvar.h>
86 #include <sys/socket.h>
87 #include <sys/socketvar.h>
88 #include <sys/kernel.h>
89 #include <sys/uio_internal.h>
90 #include <sys/kauth.h>
91 #include <kern/task.h>
92 #include <sys/priv.h>
93 #include <sys/sysctl.h>
94 #include <sys/sys_domain.h>
95 #include <sys/types.h>
96 
97 #include <security/audit/audit.h>
98 
99 #include <sys/kdebug.h>
100 #include <sys/sysproto.h>
101 #include <netinet/in.h>
102 #include <net/route.h>
103 #include <netinet/in_pcb.h>
104 
105 #include <os/log.h>
106 #include <os/ptrtools.h>
107 
108 #include <os/log.h>
109 
110 #if CONFIG_MACF_SOCKET_SUBSET
111 #include <security/mac_framework.h>
112 #endif /* MAC_SOCKET_SUBSET */
113 
114 #include <net/sockaddr_utils.h>
115 
116 #define f_flag fp_glob->fg_flag
117 #define f_ops fp_glob->fg_ops
118 
119 #define DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETSOCK, 0)
120 #define DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETSOCK, 2)
121 #define DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETSOCK, 1)
122 #define DBG_LAYER_OUT_END       NETDBG_CODE(DBG_NETSOCK, 3)
123 #define DBG_FNC_SENDMSG         NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
124 #define DBG_FNC_SENDTO          NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
125 #define DBG_FNC_SENDIT          NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
126 #define DBG_FNC_RECVFROM        NETDBG_CODE(DBG_NETSOCK, (5 << 8))
127 #define DBG_FNC_RECVMSG         NETDBG_CODE(DBG_NETSOCK, (6 << 8))
128 #define DBG_FNC_RECVIT          NETDBG_CODE(DBG_NETSOCK, (7 << 8))
129 #define DBG_FNC_SENDFILE        NETDBG_CODE(DBG_NETSOCK, (10 << 8))
130 #define DBG_FNC_SENDFILE_WAIT   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
131 #define DBG_FNC_SENDFILE_READ   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
132 #define DBG_FNC_SENDFILE_SEND   NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
133 #define DBG_FNC_SENDMSG_X       NETDBG_CODE(DBG_NETSOCK, (11 << 8))
134 #define DBG_FNC_RECVMSG_X       NETDBG_CODE(DBG_NETSOCK, (12 << 8))
135 
136 /* Forward declarations for referenced types */
137 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(void, void, __CCT_PTR);
138 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(uint8_t, uint8_t, __CCT_PTR);
139 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int32_t, int32, __CCT_REF);
140 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int, int, __CCT_REF);
141 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(user_ssize_t, user_ssize, __CCT_REF);
142 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(unsigned int, uint, __CCT_REF);
143 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(sae_connid_t, sae_connid, __CCT_REF);
144 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(socklen_t, socklen, __CCT_REF);
145 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct setsockopt_args, setsockopt_args, __CCT_REF);
146 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct connectx_args, connectx_args, __CCT_REF);
147 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct disconnectx_args, disconnectx_args, __CCT_REF);
148 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct cmsghdr, cmsghdr, __CCT_REF);
149 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct timeval, timeval, __CCT_REF);
150 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user64_timeval, user64_timeval, __CCT_REF);
151 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user32_timeval, user32_timeval, __CCT_REF);
152 
153 static int sendit(proc_ref_t, socket_ref_t, user_msghdr_ref_t, uio_t,
154     int, int32_ref_t );
155 static int recvit(proc_ref_t, int, user_msghdr_ref_t, uio_t, user_addr_t,
156     int32_ref_t);
157 static int connectit(socket_ref_t, sockaddr_ref_t);
158 static int getsockaddr(socket_ref_t, sockaddr_ref_ref_t, user_addr_t,
159     size_t, boolean_t);
160 static int getsockaddr_s(socket_ref_t, sockaddr_storage_ref_t,
161     user_addr_t, size_t, boolean_t);
162 #if SENDFILE
163 static void alloc_sendpkt(int, size_t, uint_ref_t, mbuf_ref_ref_t,
164     boolean_t);
165 #endif /* SENDFILE */
166 static int connectx_nocancel(proc_ref_t, connectx_args_ref_t, int_ref_t);
167 static int connectitx(socket_ref_t, sockaddr_ref_t,
168     sockaddr_ref_t, proc_ref_t, uint32_t, sae_associd_t,
169     sae_connid_ref_t, uio_t, unsigned int, user_ssize_ref_t);
170 static int disconnectx_nocancel(proc_ref_t, disconnectx_args_ref_t,
171     int_ref_t);
172 static int socket_common(proc_ref_t, int, int, int, pid_t, int32_ref_t, int);
173 
174 static int internalize_recv_msghdr_array(const void_ptr_t, int, int,
175     u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t);
176 static u_int externalize_recv_msghdr_array(proc_ref_t, socket_ref_t, void_ptr_t,
177     u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t, int_ref_t);
178 
179 static recv_msg_elem_ptr_t alloc_recv_msg_array(u_int count);
180 static int recv_msg_array_is_valid(recv_msg_elem_ptr_t, u_int count);
181 static void free_recv_msg_array(recv_msg_elem_ptr_t, u_int count);
182 static int copyout_control(proc_ref_t, mbuf_ref_t, user_addr_t control,
183     socklen_ref_t, int_ref_t, socket_ref_t);
184 
185 SYSCTL_DECL(_kern_ipc);
186 
187 #define SO_MAX_MSG_X_DEFAULT 256
188 
189 static u_int somaxsendmsgx = SO_MAX_MSG_X_DEFAULT;
190 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
191     CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
192 
193 static u_int somaxrecvmsgx = SO_MAX_MSG_X_DEFAULT;
194 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
195     CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
196 
197 static u_int missingpktinfo = 0;
198 SYSCTL_UINT(_kern_ipc, OID_AUTO, missingpktinfo,
199     CTLFLAG_RD | CTLFLAG_LOCKED, &missingpktinfo, 0, "");
200 
201 static int do_recvmsg_x_donttrunc = 0;
202 SYSCTL_INT(_kern_ipc, OID_AUTO, do_recvmsg_x_donttrunc,
203     CTLFLAG_RW | CTLFLAG_LOCKED, &do_recvmsg_x_donttrunc, 0, "");
204 
205 #if DEBUG || DEVELOPMENT
206 static int uipc_debug = 0;
207 SYSCTL_INT(_kern_ipc, OID_AUTO, debug,
208     CTLFLAG_RW | CTLFLAG_LOCKED, &uipc_debug, 0, "");
209 
210 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
211 #define DBG_PRINTF(...) if (uipc_debug != 0) {  \
212     os_log(OS_LOG_DEFAULT, __VA_ARGS__);        \
213 }
214 #else
215 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
216 #define DBG_PRINTF(...) do { } while (0)
217 #endif
218 
219 
220 /*
221  * Values for sendmsg_x_mode
222  * 0: default
223  * 1: sendit loop one at a time
224  * 2: old implementation
225  */
226 static u_int sendmsg_x_mode = 0;
227 SYSCTL_UINT(_kern_ipc, OID_AUTO, sendmsg_x_mode,
228     CTLFLAG_RW | CTLFLAG_LOCKED, &sendmsg_x_mode, 0, "");
229 
230 /*
231  * System call interface to the socket abstraction.
232  */
233 
234 extern const struct fileops socketops;
235 
236 /*
237  * Returns:	0			Success
238  *		EACCES			Mandatory Access Control failure
239  *	falloc:ENFILE
240  *	falloc:EMFILE
241  *	falloc:ENOMEM
242  *	socreate:EAFNOSUPPORT
243  *	socreate:EPROTOTYPE
244  *	socreate:EPROTONOSUPPORT
245  *	socreate:ENOBUFS
246  *	socreate:ENOMEM
247  *	socreate:???			[other protocol families, IPSEC]
248  */
249 int
socket(proc_ref_t p,struct socket_args * uap,int32_ref_t retval)250 socket(proc_ref_t p,
251     struct socket_args *uap,
252     int32_ref_t retval)
253 {
254 	return socket_common(p, uap->domain, uap->type, uap->protocol,
255 	           proc_selfpid(), retval, 0);
256 }
257 
258 int
socket_delegate(proc_ref_t p,struct socket_delegate_args * uap,int32_ref_t retval)259 socket_delegate(proc_ref_t p,
260     struct socket_delegate_args *uap,
261     int32_ref_t retval)
262 {
263 	return socket_common(p, uap->domain, uap->type, uap->protocol,
264 	           uap->epid, retval, 1);
265 }
266 
267 static int
socket_common(proc_ref_t p,int domain,int type,int protocol,pid_t epid,int32_ref_t retval,int delegate)268 socket_common(proc_ref_t p,
269     int domain,
270     int type,
271     int protocol,
272     pid_t epid,
273     int32_ref_t retval,
274     int delegate)
275 {
276 	socket_ref_t so;
277 	fileproc_ref_t  fp;
278 	int fd, error;
279 
280 	AUDIT_ARG(socket, domain, type, protocol);
281 #if CONFIG_MACF_SOCKET_SUBSET
282 	if ((error = mac_socket_check_create(kauth_cred_get(), domain,
283 	    type, protocol)) != 0) {
284 		return error;
285 	}
286 #endif /* MAC_SOCKET_SUBSET */
287 
288 	if (delegate) {
289 		error = priv_check_cred(kauth_cred_get(),
290 		    PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
291 		if (error) {
292 			return EACCES;
293 		}
294 	}
295 
296 	error = falloc(p, &fp, &fd);
297 	if (error) {
298 		return error;
299 	}
300 	fp->f_flag = FREAD | FWRITE;
301 	fp->f_ops = &socketops;
302 
303 	if (delegate) {
304 		error = socreate_delegate(domain, &so, type, protocol, epid);
305 	} else {
306 		error = socreate(domain, &so, type, protocol);
307 	}
308 
309 	if (error) {
310 		fp_free(p, fd, fp);
311 	} else {
312 		fp_set_data(fp, so);
313 
314 		proc_fdlock(p);
315 		procfdtbl_releasefd(p, fd, NULL);
316 
317 		if (ENTR_SHOULDTRACE) {
318 			KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
319 			    fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
320 		}
321 		fp_drop(p, fd, fp, 1);
322 		proc_fdunlock(p);
323 
324 		*retval = fd;
325 	}
326 	return error;
327 }
328 
329 /*
330  * Returns:	0			Success
331  *		EDESTADDRREQ		Destination address required
332  *		EBADF			Bad file descriptor
333  *		EACCES			Mandatory Access Control failure
334  *	file_socket:ENOTSOCK
335  *	file_socket:EBADF
336  *	getsockaddr:ENAMETOOLONG	Filename too long
337  *	getsockaddr:EINVAL		Invalid argument
338  *	getsockaddr:ENOMEM		Not enough space
339  *	getsockaddr:EFAULT		Bad address
340  *	sobindlock:???
341  */
342 /* ARGSUSED */
343 int
bind(__unused proc_t p,struct bind_args * uap,__unused int32_ref_t retval)344 bind(__unused proc_t p, struct bind_args *uap, __unused int32_ref_t retval)
345 {
346 	struct sockaddr_storage ss;
347 	sockaddr_ref_t  sa = NULL;
348 	socket_ref_t so;
349 	boolean_t want_free = TRUE;
350 	int error;
351 
352 	AUDIT_ARG(fd, uap->s);
353 	error = file_socket(uap->s, &so);
354 	if (error != 0) {
355 		return error;
356 	}
357 	if (so == NULL) {
358 		error = EBADF;
359 		goto out;
360 	}
361 	if (uap->name == USER_ADDR_NULL) {
362 		error = EDESTADDRREQ;
363 		goto out;
364 	}
365 	if (uap->namelen > sizeof(ss)) {
366 		error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
367 	} else {
368 		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
369 		if (error == 0) {
370 			sa = SA(&ss);
371 			want_free = FALSE;
372 		}
373 	}
374 	if (error != 0) {
375 		goto out;
376 	}
377 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
378 #if CONFIG_MACF_SOCKET_SUBSET
379 	if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
380 	    (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
381 		error = sobindlock(so, sa, 1);  /* will lock socket */
382 	}
383 #else
384 	error = sobindlock(so, sa, 1);          /* will lock socket */
385 #endif /* MAC_SOCKET_SUBSET */
386 	if (want_free) {
387 		free_sockaddr(sa);
388 	}
389 out:
390 	file_drop(uap->s);
391 	return error;
392 }
393 
394 /*
395  * Returns:	0			Success
396  *		EBADF
397  *		EACCES			Mandatory Access Control failure
398  *	file_socket:ENOTSOCK
399  *	file_socket:EBADF
400  *	solisten:EINVAL
401  *	solisten:EOPNOTSUPP
402  *	solisten:???
403  */
404 int
listen(__unused proc_ref_t p,struct listen_args * uap,__unused int32_ref_t retval)405 listen(__unused proc_ref_t p, struct listen_args *uap,
406     __unused int32_ref_t retval)
407 {
408 	int error;
409 	socket_ref_t so;
410 
411 	AUDIT_ARG(fd, uap->s);
412 	error = file_socket(uap->s, &so);
413 	if (error) {
414 		return error;
415 	}
416 	if (so != NULL)
417 #if CONFIG_MACF_SOCKET_SUBSET
418 	{
419 		error = mac_socket_check_listen(kauth_cred_get(), so);
420 		if (error == 0) {
421 			error = solisten(so, uap->backlog);
422 		}
423 	}
424 #else
425 	{ error = solisten(so, uap->backlog);}
426 #endif /* MAC_SOCKET_SUBSET */
427 	else {
428 		error = EBADF;
429 	}
430 
431 	file_drop(uap->s);
432 	return error;
433 }
434 
435 /*
436  * Returns:	fp_get_ftype:EBADF	Bad file descriptor
437  *		fp_get_ftype:ENOTSOCK	Socket operation on non-socket
438  *		:EFAULT			Bad address on copyin/copyout
439  *		:EBADF			Bad file descriptor
440  *		:EOPNOTSUPP		Operation not supported on socket
441  *		:EINVAL			Invalid argument
442  *		:EWOULDBLOCK		Operation would block
443  *		:ECONNABORTED		Connection aborted
444  *		:EINTR			Interrupted function
445  *		:EACCES			Mandatory Access Control failure
446  *		falloc:ENFILE		Too many files open in system
447  *		falloc:EMFILE		Too many open files
448  *		falloc:ENOMEM		Not enough space
449  *		0			Success
450  */
451 int
accept_nocancel(proc_ref_t p,struct accept_nocancel_args * uap,int32_ref_t retval)452 accept_nocancel(proc_ref_t p, struct accept_nocancel_args *uap,
453     int32_ref_t retval)
454 {
455 	fileproc_ref_t  fp;
456 	sockaddr_ref_t  sa = NULL;
457 	socklen_t namelen;
458 	int error;
459 	socket_ref_t  head;
460 	socket_ref_t so = NULL;
461 	lck_mtx_t *mutex_held;
462 	int fd = uap->s;
463 	int newfd;
464 	unsigned int fflag;
465 	int dosocklock = 0;
466 
467 	*retval = -1;
468 
469 	AUDIT_ARG(fd, uap->s);
470 
471 	if (uap->name) {
472 		error = copyin(uap->anamelen, (caddr_t)&namelen,
473 		    sizeof(socklen_t));
474 		if (error) {
475 			return error;
476 		}
477 	}
478 	error = fp_get_ftype(p, fd, DTYPE_SOCKET, ENOTSOCK, &fp);
479 	if (error) {
480 		return error;
481 	}
482 	head = (struct socket *)fp_get_data(fp);
483 
484 #if CONFIG_MACF_SOCKET_SUBSET
485 	if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
486 		goto out;
487 	}
488 #endif /* MAC_SOCKET_SUBSET */
489 
490 	socket_lock(head, 1);
491 
492 	if (head->so_proto->pr_getlock != NULL) {
493 		mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
494 		dosocklock = 1;
495 	} else {
496 		mutex_held = head->so_proto->pr_domain->dom_mtx;
497 		dosocklock = 0;
498 	}
499 
500 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
501 		if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
502 			error = EOPNOTSUPP;
503 		} else {
504 			/* POSIX: The socket is not accepting connections */
505 			error = EINVAL;
506 		}
507 		socket_unlock(head, 1);
508 		goto out;
509 	}
510 check_again:
511 	if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
512 		socket_unlock(head, 1);
513 		error = EWOULDBLOCK;
514 		goto out;
515 	}
516 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
517 		if (head->so_state & SS_CANTRCVMORE) {
518 			head->so_error = ECONNABORTED;
519 			break;
520 		}
521 		if (head->so_usecount < 1) {
522 			panic("accept: head=%p refcount=%d", head,
523 			    head->so_usecount);
524 		}
525 		error = msleep((caddr_t)&head->so_timeo, mutex_held,
526 		    PSOCK | PCATCH, "accept", 0);
527 		if (head->so_usecount < 1) {
528 			panic("accept: 2 head=%p refcount=%d", head,
529 			    head->so_usecount);
530 		}
531 		if ((head->so_state & SS_DRAINING)) {
532 			error = ECONNABORTED;
533 		}
534 		if (error) {
535 			socket_unlock(head, 1);
536 			goto out;
537 		}
538 	}
539 	if (head->so_error) {
540 		error = head->so_error;
541 		head->so_error = 0;
542 		socket_unlock(head, 1);
543 		goto out;
544 	}
545 
546 	/*
547 	 * At this point we know that there is at least one connection
548 	 * ready to be accepted. Remove it from the queue prior to
549 	 * allocating the file descriptor for it since falloc() may
550 	 * block allowing another process to accept the connection
551 	 * instead.
552 	 */
553 	lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
554 
555 	so_acquire_accept_list(head, NULL);
556 	if (TAILQ_EMPTY(&head->so_comp)) {
557 		so_release_accept_list(head);
558 		goto check_again;
559 	}
560 
561 	so = TAILQ_FIRST(&head->so_comp);
562 	TAILQ_REMOVE(&head->so_comp, so, so_list);
563 	/*
564 	 * Acquire the lock of the new connection
565 	 * as we may be in the process of receiving
566 	 * a packet that may change its so_state
567 	 * (e.g.: a TCP FIN).
568 	 */
569 	if (dosocklock) {
570 		socket_lock(so, 0);
571 	}
572 	so->so_head = NULL;
573 	so->so_state &= ~SS_COMP;
574 	if (dosocklock) {
575 		socket_unlock(so, 0);
576 	}
577 	head->so_qlen--;
578 	so_release_accept_list(head);
579 
580 	/* unlock head to avoid deadlock with select, keep a ref on head */
581 	socket_unlock(head, 0);
582 
583 #if CONFIG_MACF_SOCKET_SUBSET
584 	/*
585 	 * Pass the pre-accepted socket to the MAC framework. This is
586 	 * cheaper than allocating a file descriptor for the socket,
587 	 * calling the protocol accept callback, and possibly freeing
588 	 * the file descriptor should the MAC check fails.
589 	 */
590 	if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
591 		socket_lock(so, 1);
592 		so->so_state &= ~SS_NOFDREF;
593 		socket_unlock(so, 1);
594 		soclose(so);
595 		/* Drop reference on listening socket */
596 		sodereference(head);
597 		goto out;
598 	}
599 #endif /* MAC_SOCKET_SUBSET */
600 
601 	/*
602 	 * Pass the pre-accepted socket to any interested socket filter(s).
603 	 * Upon failure, the socket would have been closed by the callee.
604 	 */
605 	if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
606 		/* Drop reference on listening socket */
607 		sodereference(head);
608 		/* Propagate socket filter's error code to the caller */
609 		goto out;
610 	}
611 
612 	fflag = fp->f_flag;
613 	error = falloc(p, &fp, &newfd);
614 	if (error) {
615 		/*
616 		 * Probably ran out of file descriptors.
617 		 *
618 		 * <rdar://problem/8554930>
619 		 * Don't put this back on the socket like we used to, that
620 		 * just causes the client to spin. Drop the socket.
621 		 */
622 		socket_lock(so, 1);
623 		so->so_state &= ~SS_NOFDREF;
624 		socket_unlock(so, 1);
625 		soclose(so);
626 		sodereference(head);
627 		goto out;
628 	}
629 	*retval = newfd;
630 	fp->f_flag = fflag;
631 	fp->f_ops = &socketops;
632 	fp_set_data(fp, so);
633 
634 	socket_lock(head, 0);
635 	if (dosocklock) {
636 		socket_lock(so, 1);
637 	}
638 
639 	/* Sync socket non-blocking/async state with file flags */
640 	if (fp->f_flag & FNONBLOCK) {
641 		so->so_state |= SS_NBIO;
642 	} else {
643 		so->so_state &= ~SS_NBIO;
644 	}
645 
646 	if (fp->f_flag & FASYNC) {
647 		so->so_state |= SS_ASYNC;
648 		so->so_rcv.sb_flags |= SB_ASYNC;
649 		so->so_snd.sb_flags |= SB_ASYNC;
650 	} else {
651 		so->so_state &= ~SS_ASYNC;
652 		so->so_rcv.sb_flags &= ~SB_ASYNC;
653 		so->so_snd.sb_flags &= ~SB_ASYNC;
654 	}
655 
656 	(void) soacceptlock(so, &sa, 0);
657 	socket_unlock(head, 1);
658 	if (sa == NULL) {
659 		namelen = 0;
660 		if (uap->name) {
661 			goto gotnoname;
662 		}
663 		error = 0;
664 		goto releasefd;
665 	}
666 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
667 
668 	if (uap->name) {
669 		socklen_t       sa_len;
670 
671 		/* save sa_len before it is destroyed */
672 		sa_len = sa->sa_len;
673 		namelen = MIN(namelen, sa_len);
674 		error = copyout(sa, uap->name, namelen);
675 		if (!error) {
676 			/* return the actual, untruncated address length */
677 			namelen = sa_len;
678 		}
679 gotnoname:
680 		error = copyout((caddr_t)&namelen, uap->anamelen,
681 		    sizeof(socklen_t));
682 	}
683 	free_sockaddr(sa);
684 
685 releasefd:
686 	/*
687 	 * If the socket has been marked as inactive by sosetdefunct(),
688 	 * disallow further operations on it.
689 	 */
690 	if (so->so_flags & SOF_DEFUNCT) {
691 		sodefunct(current_proc(), so,
692 		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
693 	}
694 
695 	if (dosocklock) {
696 		socket_unlock(so, 1);
697 	}
698 
699 	proc_fdlock(p);
700 	procfdtbl_releasefd(p, newfd, NULL);
701 	fp_drop(p, newfd, fp, 1);
702 	proc_fdunlock(p);
703 
704 out:
705 	if (error == 0 && ENTR_SHOULDTRACE) {
706 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
707 		    newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
708 	}
709 
710 	file_drop(fd);
711 	return error;
712 }
713 
714 int
accept(proc_ref_t p,struct accept_args * uap,int32_ref_t retval)715 accept(proc_ref_t p, struct accept_args *uap, int32_ref_t retval)
716 {
717 	__pthread_testcancel(1);
718 	return accept_nocancel(p, (struct accept_nocancel_args *)uap,
719 	           retval);
720 }
721 
722 /*
723  * Returns:	0			Success
724  *		EBADF			Bad file descriptor
725  *		EALREADY		Connection already in progress
726  *		EINPROGRESS		Operation in progress
727  *		ECONNABORTED		Connection aborted
728  *		EINTR			Interrupted function
729  *		EACCES			Mandatory Access Control failure
730  *	file_socket:ENOTSOCK
731  *	file_socket:EBADF
732  *	getsockaddr:ENAMETOOLONG	Filename too long
733  *	getsockaddr:EINVAL		Invalid argument
734  *	getsockaddr:ENOMEM		Not enough space
735  *	getsockaddr:EFAULT		Bad address
736  *	soconnectlock:EOPNOTSUPP
737  *	soconnectlock:EISCONN
738  *	soconnectlock:???		[depends on protocol, filters]
739  *	msleep:EINTR
740  *
741  * Imputed:	so_error		error may be set from so_error, which
742  *					may have been set by soconnectlock.
743  */
744 /* ARGSUSED */
745 int
connect(proc_ref_t p,struct connect_args * uap,int32_ref_t retval)746 connect(proc_ref_t p, struct connect_args *uap, int32_ref_t retval)
747 {
748 	__pthread_testcancel(1);
749 	return connect_nocancel(p, (struct connect_nocancel_args *)uap,
750 	           retval);
751 }
752 
753 int
connect_nocancel(proc_t p,struct connect_nocancel_args * uap,int32_ref_t retval)754 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_ref_t retval)
755 {
756 #pragma unused(p, retval)
757 	socket_ref_t so;
758 	struct sockaddr_storage ss;
759 	sockaddr_ref_t  sa = NULL;
760 	int error;
761 	int fd = uap->s;
762 	boolean_t dgram;
763 
764 	AUDIT_ARG(fd, uap->s);
765 	error = file_socket(fd, &so);
766 	if (error != 0) {
767 		return error;
768 	}
769 	if (so == NULL) {
770 		error = EBADF;
771 		goto out;
772 	}
773 
774 	/*
775 	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
776 	 * if this is a datagram socket; translate for other types.
777 	 */
778 	dgram = (so->so_type == SOCK_DGRAM);
779 
780 	/* Get socket address now before we obtain socket lock */
781 	if (uap->namelen > sizeof(ss)) {
782 		error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
783 	} else {
784 		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
785 		if (error == 0) {
786 			sa = SA(&ss);
787 		}
788 	}
789 	if (error != 0) {
790 		goto out;
791 	}
792 
793 	error = connectit(so, sa);
794 
795 	if (sa != NULL && sa != SA(&ss)) {
796 		free_sockaddr(sa);
797 	}
798 	if (error == ERESTART) {
799 		error = EINTR;
800 	}
801 out:
802 	file_drop(fd);
803 	return error;
804 }
805 
806 static int
connectx_nocancel(proc_ref_t p,connectx_args_ref_t uap,int_ref_t retval)807 connectx_nocancel(proc_ref_t p, connectx_args_ref_t uap, int_ref_t retval)
808 {
809 #pragma unused(p, retval)
810 	struct sockaddr_storage ss, sd;
811 	sockaddr_ref_t  src = NULL, dst = NULL;
812 	socket_ref_t so;
813 	int error, error1, fd = uap->socket;
814 	boolean_t dgram;
815 	sae_connid_t cid = SAE_CONNID_ANY;
816 	struct user32_sa_endpoints ep32;
817 	struct user64_sa_endpoints ep64;
818 	struct user_sa_endpoints ep;
819 	user_ssize_t bytes_written = 0;
820 	struct user_iovec *iovp;
821 	uio_t auio = NULL;
822 
823 	AUDIT_ARG(fd, uap->socket);
824 	error = file_socket(fd, &so);
825 	if (error != 0) {
826 		return error;
827 	}
828 	if (so == NULL) {
829 		error = EBADF;
830 		goto out;
831 	}
832 
833 	if (uap->endpoints == USER_ADDR_NULL) {
834 		error = EINVAL;
835 		goto out;
836 	}
837 
838 	if (IS_64BIT_PROCESS(p)) {
839 		error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
840 		if (error != 0) {
841 			goto out;
842 		}
843 
844 		ep.sae_srcif = ep64.sae_srcif;
845 		ep.sae_srcaddr = (user_addr_t)ep64.sae_srcaddr;
846 		ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
847 		ep.sae_dstaddr = (user_addr_t)ep64.sae_dstaddr;
848 		ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
849 	} else {
850 		error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
851 		if (error != 0) {
852 			goto out;
853 		}
854 
855 		ep.sae_srcif = ep32.sae_srcif;
856 		ep.sae_srcaddr = ep32.sae_srcaddr;
857 		ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
858 		ep.sae_dstaddr = ep32.sae_dstaddr;
859 		ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
860 	}
861 
862 	/*
863 	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
864 	 * if this is a datagram socket; translate for other types.
865 	 */
866 	dgram = (so->so_type == SOCK_DGRAM);
867 
868 	/* Get socket address now before we obtain socket lock */
869 	if (ep.sae_srcaddr != USER_ADDR_NULL) {
870 		if (ep.sae_srcaddrlen > sizeof(ss)) {
871 			error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
872 		} else {
873 			error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
874 			if (error == 0) {
875 				src = SA(&ss);
876 			}
877 		}
878 
879 		if (error) {
880 			goto out;
881 		}
882 	}
883 
884 	if (ep.sae_dstaddr == USER_ADDR_NULL) {
885 		error = EINVAL;
886 		goto out;
887 	}
888 
889 	/* Get socket address now before we obtain socket lock */
890 	if (ep.sae_dstaddrlen > sizeof(sd)) {
891 		error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
892 	} else {
893 		error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
894 		if (error == 0) {
895 			dst = SA(&sd);
896 		}
897 	}
898 
899 	if (error) {
900 		goto out;
901 	}
902 
903 	VERIFY(dst != NULL);
904 
905 	if (uap->iov != USER_ADDR_NULL) {
906 		/* Verify range before calling uio_create() */
907 		if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
908 			error = EINVAL;
909 			goto out;
910 		}
911 
912 		if (uap->len == USER_ADDR_NULL) {
913 			error = EINVAL;
914 			goto out;
915 		}
916 
917 		/* allocate a uio to hold the number of iovecs passed */
918 		auio = uio_create(uap->iovcnt, 0,
919 		    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
920 		    UIO_WRITE);
921 
922 		if (auio == NULL) {
923 			error = ENOMEM;
924 			goto out;
925 		}
926 
927 		/*
928 		 * get location of iovecs within the uio.
929 		 * then copyin the iovecs from user space.
930 		 */
931 		iovp = uio_iovsaddr_user(auio);
932 		if (iovp == NULL) {
933 			error = ENOMEM;
934 			goto out;
935 		}
936 		error = copyin_user_iovec_array(uap->iov,
937 		    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
938 		    uap->iovcnt, iovp);
939 		if (error != 0) {
940 			goto out;
941 		}
942 
943 		/* finish setup of uio_t */
944 		error = uio_calculateresid_user(auio);
945 		if (error != 0) {
946 			goto out;
947 		}
948 	}
949 
950 	error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
951 	    &cid, auio, uap->flags, &bytes_written);
952 	if (error == ERESTART) {
953 		error = EINTR;
954 	}
955 
956 	if (uap->len != USER_ADDR_NULL) {
957 		if (IS_64BIT_PROCESS(p)) {
958 			error1 = copyout(&bytes_written, uap->len, sizeof(user64_size_t));
959 		} else {
960 			error1 = copyout(&bytes_written, uap->len, sizeof(user32_size_t));
961 		}
962 		/* give precedence to connectitx errors */
963 		if ((error1 != 0) && (error == 0)) {
964 			error = error1;
965 		}
966 	}
967 
968 	if (uap->connid != USER_ADDR_NULL) {
969 		error1 = copyout(&cid, uap->connid, sizeof(cid));
970 		/* give precedence to connectitx errors */
971 		if ((error1 != 0) && (error == 0)) {
972 			error = error1;
973 		}
974 	}
975 out:
976 	file_drop(fd);
977 	if (auio != NULL) {
978 		uio_free(auio);
979 	}
980 	if (src != NULL && src != SA(&ss)) {
981 		free_sockaddr(src);
982 	}
983 	if (dst != NULL && dst != SA(&sd)) {
984 		free_sockaddr(dst);
985 	}
986 	return error;
987 }
988 
989 int
connectx(proc_ref_t p,struct connectx_args * uap,int * retval)990 connectx(proc_ref_t p, struct connectx_args *uap, int *retval)
991 {
992 	/*
993 	 * Due to similiarity with a POSIX interface, define as
994 	 * an unofficial cancellation point.
995 	 */
996 	__pthread_testcancel(1);
997 	return connectx_nocancel(p, uap, retval);
998 }
999 
1000 static int
connectit(struct socket * so,sockaddr_ref_t sa)1001 connectit(struct socket *so, sockaddr_ref_t sa)
1002 {
1003 	int error;
1004 
1005 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
1006 #if CONFIG_MACF_SOCKET_SUBSET
1007 	if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
1008 		return error;
1009 	}
1010 #endif /* MAC_SOCKET_SUBSET */
1011 
1012 	socket_lock(so, 1);
1013 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1014 		error = EALREADY;
1015 		goto out;
1016 	}
1017 	error = soconnectlock(so, sa, 0);
1018 	if (error != 0) {
1019 		goto out;
1020 	}
1021 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1022 		error = EINPROGRESS;
1023 		goto out;
1024 	}
1025 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1026 		lck_mtx_t *mutex_held;
1027 
1028 		if (so->so_proto->pr_getlock != NULL) {
1029 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1030 		} else {
1031 			mutex_held = so->so_proto->pr_domain->dom_mtx;
1032 		}
1033 		error = msleep((caddr_t)&so->so_timeo, mutex_held,
1034 		    PSOCK | PCATCH, __func__, 0);
1035 		if (so->so_state & SS_DRAINING) {
1036 			error = ECONNABORTED;
1037 		}
1038 		if (error != 0) {
1039 			break;
1040 		}
1041 	}
1042 	if (error == 0) {
1043 		error = so->so_error;
1044 		so->so_error = 0;
1045 	}
1046 out:
1047 	socket_unlock(so, 1);
1048 	return error;
1049 }
1050 
1051 static int
connectitx(struct socket * so,sockaddr_ref_t src,sockaddr_ref_t dst,proc_ref_t p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uio_t auio,unsigned int flags,user_ssize_t * bytes_written)1052 connectitx(struct socket *so, sockaddr_ref_t src,
1053     sockaddr_ref_t dst, proc_ref_t p, uint32_t ifscope,
1054     sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
1055     user_ssize_t *bytes_written)
1056 {
1057 	int error;
1058 
1059 	VERIFY(dst != NULL);
1060 
1061 	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1062 #if CONFIG_MACF_SOCKET_SUBSET
1063 	if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1064 		return error;
1065 	}
1066 
1067 	if (auio != NULL) {
1068 		if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1069 			return error;
1070 		}
1071 	}
1072 #endif /* MAC_SOCKET_SUBSET */
1073 
1074 	socket_lock(so, 1);
1075 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1076 		error = EALREADY;
1077 		goto out;
1078 	}
1079 
1080 	error = soconnectxlocked(so, src, dst, p, ifscope,
1081 	    aid, pcid, flags, NULL, 0, auio, bytes_written);
1082 	if (error != 0) {
1083 		goto out;
1084 	}
1085 	/*
1086 	 * If, after the call to soconnectxlocked the flag is still set (in case
1087 	 * data has been queued and the connect() has actually been triggered,
1088 	 * it will have been unset by the transport), we exit immediately. There
1089 	 * is no reason to wait on any event.
1090 	 */
1091 	if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1092 		error = 0;
1093 		goto out;
1094 	}
1095 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1096 		error = EINPROGRESS;
1097 		goto out;
1098 	}
1099 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1100 		lck_mtx_t *mutex_held;
1101 
1102 		if (so->so_proto->pr_getlock != NULL) {
1103 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1104 		} else {
1105 			mutex_held = so->so_proto->pr_domain->dom_mtx;
1106 		}
1107 		error = msleep((caddr_t)&so->so_timeo, mutex_held,
1108 		    PSOCK | PCATCH, __func__, 0);
1109 		if (so->so_state & SS_DRAINING) {
1110 			error = ECONNABORTED;
1111 		}
1112 		if (error != 0) {
1113 			break;
1114 		}
1115 	}
1116 	if (error == 0) {
1117 		error = so->so_error;
1118 		so->so_error = 0;
1119 	}
1120 out:
1121 	socket_unlock(so, 1);
1122 	return error;
1123 }
1124 
1125 int
peeloff(proc_ref_t p,struct peeloff_args * uap,int * retval)1126 peeloff(proc_ref_t p, struct peeloff_args *uap, int *retval)
1127 {
1128 #pragma unused(p, uap, retval)
1129 	/*
1130 	 * Due to similiarity with a POSIX interface, define as
1131 	 * an unofficial cancellation point.
1132 	 */
1133 	__pthread_testcancel(1);
1134 	return 0;
1135 }
1136 
1137 int
disconnectx(proc_ref_t p,struct disconnectx_args * uap,int * retval)1138 disconnectx(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1139 {
1140 	/*
1141 	 * Due to similiarity with a POSIX interface, define as
1142 	 * an unofficial cancellation point.
1143 	 */
1144 	__pthread_testcancel(1);
1145 	return disconnectx_nocancel(p, uap, retval);
1146 }
1147 
1148 static int
disconnectx_nocancel(proc_ref_t p,struct disconnectx_args * uap,int * retval)1149 disconnectx_nocancel(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1150 {
1151 #pragma unused(p, retval)
1152 	socket_ref_t so;
1153 	int fd = uap->s;
1154 	int error;
1155 
1156 	error = file_socket(fd, &so);
1157 	if (error != 0) {
1158 		return error;
1159 	}
1160 	if (so == NULL) {
1161 		error = EBADF;
1162 		goto out;
1163 	}
1164 
1165 	error = sodisconnectx(so, uap->aid, uap->cid);
1166 out:
1167 	file_drop(fd);
1168 	return error;
1169 }
1170 
1171 /*
1172  * Returns:	0			Success
1173  *	socreate:EAFNOSUPPORT
1174  *	socreate:EPROTOTYPE
1175  *	socreate:EPROTONOSUPPORT
1176  *	socreate:ENOBUFS
1177  *	socreate:ENOMEM
1178  *	socreate:EISCONN
1179  *	socreate:???			[other protocol families, IPSEC]
1180  *	falloc:ENFILE
1181  *	falloc:EMFILE
1182  *	falloc:ENOMEM
1183  *	copyout:EFAULT
1184  *	soconnect2:EINVAL
1185  *	soconnect2:EPROTOTYPE
1186  *	soconnect2:???			[other protocol families[
1187  */
1188 int
socketpair(proc_ref_t p,struct socketpair_args * uap,__unused int32_ref_t retval)1189 socketpair(proc_ref_t p, struct socketpair_args *uap,
1190     __unused int32_ref_t retval)
1191 {
1192 	fileproc_ref_t  fp1, fp2;
1193 	socket_ref_t so1, so2;
1194 	int fd, error, sv[2];
1195 
1196 	AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1197 	error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1198 	if (error) {
1199 		return error;
1200 	}
1201 	error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1202 	if (error) {
1203 		goto free1;
1204 	}
1205 
1206 	error = falloc(p, &fp1, &fd);
1207 	if (error) {
1208 		goto free2;
1209 	}
1210 	fp1->f_flag = FREAD | FWRITE;
1211 	fp1->f_ops = &socketops;
1212 	fp_set_data(fp1, so1);
1213 	sv[0] = fd;
1214 
1215 	error = falloc(p, &fp2, &fd);
1216 	if (error) {
1217 		goto free3;
1218 	}
1219 	fp2->f_flag = FREAD | FWRITE;
1220 	fp2->f_ops = &socketops;
1221 	fp_set_data(fp2, so2);
1222 	sv[1] = fd;
1223 
1224 	error = soconnect2(so1, so2);
1225 	if (error) {
1226 		goto free4;
1227 	}
1228 	if (uap->type == SOCK_DGRAM) {
1229 		/*
1230 		 * Datagram socket connection is asymmetric.
1231 		 */
1232 		error = soconnect2(so2, so1);
1233 		if (error) {
1234 			goto free4;
1235 		}
1236 	}
1237 
1238 	if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1239 		goto free4;
1240 	}
1241 
1242 	proc_fdlock(p);
1243 	procfdtbl_releasefd(p, sv[0], NULL);
1244 	procfdtbl_releasefd(p, sv[1], NULL);
1245 	fp_drop(p, sv[0], fp1, 1);
1246 	fp_drop(p, sv[1], fp2, 1);
1247 	proc_fdunlock(p);
1248 
1249 	return 0;
1250 free4:
1251 	fp_free(p, sv[1], fp2);
1252 free3:
1253 	fp_free(p, sv[0], fp1);
1254 free2:
1255 	(void) soclose(so2);
1256 free1:
1257 	(void) soclose(so1);
1258 	return error;
1259 }
1260 
1261 /*
1262  * Returns:	0			Success
1263  *		EINVAL
1264  *		ENOBUFS
1265  *		EBADF
1266  *		EPIPE
1267  *		EACCES			Mandatory Access Control failure
1268  *	file_socket:ENOTSOCK
1269  *	file_socket:EBADF
1270  *	getsockaddr:ENAMETOOLONG	Filename too long
1271  *	getsockaddr:EINVAL		Invalid argument
1272  *	getsockaddr:ENOMEM		Not enough space
1273  *	getsockaddr:EFAULT		Bad address
1274  *	<pru_sosend>:EACCES[TCP]
1275  *	<pru_sosend>:EADDRINUSE[TCP]
1276  *	<pru_sosend>:EADDRNOTAVAIL[TCP]
1277  *	<pru_sosend>:EAFNOSUPPORT[TCP]
1278  *	<pru_sosend>:EAGAIN[TCP]
1279  *	<pru_sosend>:EBADF
1280  *	<pru_sosend>:ECONNRESET[TCP]
1281  *	<pru_sosend>:EFAULT
1282  *	<pru_sosend>:EHOSTUNREACH[TCP]
1283  *	<pru_sosend>:EINTR
1284  *	<pru_sosend>:EINVAL
1285  *	<pru_sosend>:EISCONN[AF_INET]
1286  *	<pru_sosend>:EMSGSIZE[TCP]
1287  *	<pru_sosend>:ENETDOWN[TCP]
1288  *	<pru_sosend>:ENETUNREACH[TCP]
1289  *	<pru_sosend>:ENOBUFS
1290  *	<pru_sosend>:ENOMEM[TCP]
1291  *	<pru_sosend>:ENOTCONN[AF_INET]
1292  *	<pru_sosend>:EOPNOTSUPP
1293  *	<pru_sosend>:EPERM[TCP]
1294  *	<pru_sosend>:EPIPE
1295  *	<pru_sosend>:EWOULDBLOCK
1296  *	<pru_sosend>:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
1297  *	<pru_sosend>:???[AF_INET]	[whatever a filter author chooses]
1298  *	<pru_sosend>:???		[value from so_error]
1299  *	sockargs:???
1300  */
1301 static int
sendit(proc_ref_t p,struct socket * so,user_msghdr_ref_t mp,uio_t uiop,int flags,int32_ref_t retval)1302 sendit(proc_ref_t p, struct socket *so, user_msghdr_ref_t mp, uio_t uiop,
1303     int flags, int32_ref_t retval)
1304 {
1305 	mbuf_ref_t  control = NULL;
1306 	struct sockaddr_storage ss;
1307 	sockaddr_ref_t  to = NULL;
1308 	boolean_t want_free = TRUE;
1309 	int error;
1310 	user_ssize_t len;
1311 
1312 	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1313 
1314 	if (mp->msg_name != USER_ADDR_NULL) {
1315 		if (mp->msg_namelen > sizeof(ss)) {
1316 			error = getsockaddr(so, &to, mp->msg_name,
1317 			    mp->msg_namelen, TRUE);
1318 		} else {
1319 			error = getsockaddr_s(so, &ss, mp->msg_name,
1320 			    mp->msg_namelen, TRUE);
1321 			if (error == 0) {
1322 				to = SA(&ss);
1323 				want_free = FALSE;
1324 			}
1325 		}
1326 		if (error != 0) {
1327 			goto out;
1328 		}
1329 		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1330 	}
1331 	if (mp->msg_control != USER_ADDR_NULL) {
1332 		if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1333 			error = EINVAL;
1334 			goto bad;
1335 		}
1336 		error = sockargs(&control, mp->msg_control,
1337 		    mp->msg_controllen, MT_CONTROL);
1338 		if (error != 0) {
1339 			goto bad;
1340 		}
1341 	}
1342 
1343 #if CONFIG_MACF_SOCKET_SUBSET
1344 	/*
1345 	 * We check the state without holding the socket lock;
1346 	 * if a race condition occurs, it would simply result
1347 	 * in an extra call to the MAC check function.
1348 	 */
1349 	if (to != NULL &&
1350 	    !(so->so_state & SS_DEFUNCT) &&
1351 	    (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1352 		if (control != NULL) {
1353 			m_freem(control);
1354 		}
1355 
1356 		goto bad;
1357 	}
1358 #endif /* MAC_SOCKET_SUBSET */
1359 
1360 	len = uio_resid(uiop);
1361 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1362 	    control, flags);
1363 	if (error != 0) {
1364 		if (uio_resid(uiop) != len && (error == ERESTART ||
1365 		    error == EINTR || error == EWOULDBLOCK)) {
1366 			error = 0;
1367 		}
1368 		/* Generation of SIGPIPE can be controlled per socket */
1369 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1370 		    !(flags & MSG_NOSIGNAL)) {
1371 			psignal(p, SIGPIPE);
1372 		}
1373 	}
1374 	if (error == 0) {
1375 		*retval = (int)(len - uio_resid(uiop));
1376 	}
1377 bad:
1378 	if (want_free) {
1379 		free_sockaddr(to);
1380 	}
1381 out:
1382 	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1383 
1384 	return error;
1385 }
1386 
1387 /*
1388  * Returns:	0			Success
1389  *		ENOMEM
1390  *	sendit:???			[see sendit definition in this file]
1391  *	write:???			[4056224: applicable for pipes]
1392  */
1393 int
sendto(proc_ref_t p,struct sendto_args * uap,int32_ref_t retval)1394 sendto(proc_ref_t p, struct sendto_args *uap, int32_ref_t retval)
1395 {
1396 	__pthread_testcancel(1);
1397 	return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1398 }
1399 
1400 int
sendto_nocancel(proc_ref_t p,struct sendto_nocancel_args * uap,int32_ref_t retval)1401 sendto_nocancel(proc_ref_t p,
1402     struct sendto_nocancel_args *uap,
1403     int32_ref_t retval)
1404 {
1405 	struct user_msghdr msg;
1406 	int error;
1407 	uio_t auio = NULL;
1408 	socket_ref_t so;
1409 
1410 	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1411 	AUDIT_ARG(fd, uap->s);
1412 
1413 	if (uap->flags & MSG_SKIPCFIL) {
1414 		error = EPERM;
1415 		goto done;
1416 	}
1417 
1418 	if (uap->len > LONG_MAX) {
1419 		error = EINVAL;
1420 		goto done;
1421 	}
1422 
1423 	auio = uio_create(1, 0,
1424 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1425 	    UIO_WRITE);
1426 	if (auio == NULL) {
1427 		error = ENOMEM;
1428 		goto done;
1429 	}
1430 	uio_addiov(auio, uap->buf, uap->len);
1431 
1432 	msg.msg_name = uap->to;
1433 	msg.msg_namelen = uap->tolen;
1434 	/* no need to set up msg_iov.  sendit uses uio_t we send it */
1435 	msg.msg_iov = 0;
1436 	msg.msg_iovlen = 0;
1437 	msg.msg_control = 0;
1438 	msg.msg_flags = 0;
1439 
1440 	error = file_socket(uap->s, &so);
1441 	if (error) {
1442 		goto done;
1443 	}
1444 
1445 	if (so == NULL) {
1446 		error = EBADF;
1447 	} else {
1448 		error = sendit(p, so, &msg, auio, uap->flags, retval);
1449 	}
1450 
1451 	file_drop(uap->s);
1452 done:
1453 	if (auio != NULL) {
1454 		uio_free(auio);
1455 	}
1456 
1457 	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1458 
1459 	return error;
1460 }
1461 
1462 /*
1463  * Returns:	0			Success
1464  *		ENOBUFS
1465  *	copyin:EFAULT
1466  *	sendit:???			[see sendit definition in this file]
1467  */
1468 int
sendmsg(proc_ref_t p,struct sendmsg_args * uap,int32_ref_t retval)1469 sendmsg(proc_ref_t p, struct sendmsg_args *uap, int32_ref_t retval)
1470 {
1471 	__pthread_testcancel(1);
1472 	return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1473 	           retval);
1474 }
1475 
1476 int
sendmsg_nocancel(proc_ref_t p,struct sendmsg_nocancel_args * uap,int32_ref_t retval)1477 sendmsg_nocancel(proc_ref_t p, struct sendmsg_nocancel_args *uap,
1478     int32_ref_t retval)
1479 {
1480 	struct user32_msghdr msg32;
1481 	struct user64_msghdr msg64;
1482 	struct user_msghdr user_msg;
1483 	caddr_t msghdrp;
1484 	int     size_of_msghdr;
1485 	int error;
1486 	uio_t auio = NULL;
1487 	struct user_iovec *iovp;
1488 	socket_ref_t so;
1489 
1490 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1491 
1492 	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1493 	AUDIT_ARG(fd, uap->s);
1494 
1495 	if (uap->flags & MSG_SKIPCFIL) {
1496 		error = EPERM;
1497 		goto done;
1498 	}
1499 
1500 	if (is_p_64bit_process) {
1501 		msghdrp = (caddr_t)&msg64;
1502 		size_of_msghdr = sizeof(msg64);
1503 	} else {
1504 		msghdrp = (caddr_t)&msg32;
1505 		size_of_msghdr = sizeof(msg32);
1506 	}
1507 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
1508 	if (error) {
1509 		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1510 		return error;
1511 	}
1512 
1513 	if (is_p_64bit_process) {
1514 		user_msg.msg_flags = msg64.msg_flags;
1515 		user_msg.msg_controllen = msg64.msg_controllen;
1516 		user_msg.msg_control = (user_addr_t)msg64.msg_control;
1517 		user_msg.msg_iovlen = msg64.msg_iovlen;
1518 		user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
1519 		user_msg.msg_namelen = msg64.msg_namelen;
1520 		user_msg.msg_name = (user_addr_t)msg64.msg_name;
1521 	} else {
1522 		user_msg.msg_flags = msg32.msg_flags;
1523 		user_msg.msg_controllen = msg32.msg_controllen;
1524 		user_msg.msg_control = msg32.msg_control;
1525 		user_msg.msg_iovlen = msg32.msg_iovlen;
1526 		user_msg.msg_iov = msg32.msg_iov;
1527 		user_msg.msg_namelen = msg32.msg_namelen;
1528 		user_msg.msg_name = msg32.msg_name;
1529 	}
1530 
1531 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1532 		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1533 		    0, 0, 0, 0);
1534 		return EMSGSIZE;
1535 	}
1536 
1537 	/* allocate a uio large enough to hold the number of iovecs passed */
1538 	auio = uio_create(user_msg.msg_iovlen, 0,
1539 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1540 	    UIO_WRITE);
1541 	if (auio == NULL) {
1542 		error = ENOBUFS;
1543 		goto done;
1544 	}
1545 
1546 	if (user_msg.msg_iovlen) {
1547 		/*
1548 		 * get location of iovecs within the uio.
1549 		 * then copyin the iovecs from user space.
1550 		 */
1551 		iovp = uio_iovsaddr_user(auio);
1552 		if (iovp == NULL) {
1553 			error = ENOBUFS;
1554 			goto done;
1555 		}
1556 		error = copyin_user_iovec_array(user_msg.msg_iov,
1557 		    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1558 		    user_msg.msg_iovlen, iovp);
1559 		if (error) {
1560 			goto done;
1561 		}
1562 		user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1563 
1564 		/* finish setup of uio_t */
1565 		error = uio_calculateresid_user(auio);
1566 		if (error) {
1567 			goto done;
1568 		}
1569 	} else {
1570 		user_msg.msg_iov = 0;
1571 	}
1572 
1573 	/* msg_flags is ignored for send */
1574 	user_msg.msg_flags = 0;
1575 
1576 	error = file_socket(uap->s, &so);
1577 	if (error) {
1578 		goto done;
1579 	}
1580 	if (so == NULL) {
1581 		error = EBADF;
1582 	} else {
1583 		error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1584 	}
1585 	file_drop(uap->s);
1586 done:
1587 	if (auio != NULL) {
1588 		uio_free(auio);
1589 	}
1590 	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1591 
1592 	return error;
1593 }
1594 
1595 static int
internalize_user_msg_x(struct user_msghdr * user_msg,uio_t * auiop,proc_ref_t p,void_ptr_t user_msghdr_x_src)1596 internalize_user_msg_x(struct user_msghdr *user_msg, uio_t *auiop, proc_ref_t p, void_ptr_t user_msghdr_x_src)
1597 {
1598 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1599 	uio_t auio = *auiop;
1600 	int error;
1601 
1602 	if (is_p_64bit_process) {
1603 		struct user64_msghdr_x msghdrx64;
1604 
1605 		error = copyin((user_addr_t)user_msghdr_x_src,
1606 		    &msghdrx64, sizeof(msghdrx64));
1607 		if (error != 0) {
1608 			DBG_PRINTF("%s copyin() msghdrx64 failed %d",
1609 			    __func__, error);
1610 			goto done;
1611 		}
1612 		user_msg->msg_name = msghdrx64.msg_name;
1613 		user_msg->msg_namelen = msghdrx64.msg_namelen;
1614 		user_msg->msg_iov = msghdrx64.msg_iov;
1615 		user_msg->msg_iovlen = msghdrx64.msg_iovlen;
1616 		user_msg->msg_control = msghdrx64.msg_control;
1617 		user_msg->msg_controllen = msghdrx64.msg_controllen;
1618 	} else {
1619 		struct user32_msghdr_x msghdrx32;
1620 
1621 		error = copyin((user_addr_t)user_msghdr_x_src,
1622 		    &msghdrx32, sizeof(msghdrx32));
1623 		if (error != 0) {
1624 			DBG_PRINTF("%s copyin() msghdrx32 failed %d",
1625 			    __func__, error);
1626 			goto done;
1627 		}
1628 		user_msg->msg_name = msghdrx32.msg_name;
1629 		user_msg->msg_namelen = msghdrx32.msg_namelen;
1630 		user_msg->msg_iov = msghdrx32.msg_iov;
1631 		user_msg->msg_iovlen = msghdrx32.msg_iovlen;
1632 		user_msg->msg_control = msghdrx32.msg_control;
1633 		user_msg->msg_controllen = msghdrx32.msg_controllen;
1634 	}
1635 	/* msg_flags is ignored for send */
1636 	user_msg->msg_flags = 0;
1637 
1638 	if (user_msg->msg_iovlen <= 0 || user_msg->msg_iovlen > UIO_MAXIOV) {
1639 		error = EMSGSIZE;
1640 		DBG_PRINTF("%s bad msg_iovlen, error %d",
1641 		    __func__, error);
1642 		goto done;
1643 	}
1644 	/*
1645 	 * Attempt to reuse the uio if large enough, otherwise we need
1646 	 * a new one
1647 	 */
1648 	if (auio != NULL) {
1649 		if (auio->uio_max_iovs >= user_msg->msg_iovlen) {
1650 			uio_reset_fast(auio, 0,
1651 			    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1652 			    UIO_WRITE);
1653 		} else {
1654 			uio_free(auio);
1655 			auio = NULL;
1656 		}
1657 	}
1658 	if (auio == NULL) {
1659 		auio = uio_create(user_msg->msg_iovlen, 0,
1660 		    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1661 		    UIO_WRITE);
1662 		if (auio == NULL) {
1663 			error = ENOBUFS;
1664 			DBG_PRINTF("%s uio_create() failed %d",
1665 			    __func__, error);
1666 			goto done;
1667 		}
1668 	}
1669 
1670 	if (user_msg->msg_iovlen) {
1671 		/*
1672 		 * get location of iovecs within the uio.
1673 		 * then copyin the iovecs from user space.
1674 		 */
1675 		struct user_iovec *iovp = uio_iovsaddr_user(auio);
1676 		if (iovp == NULL) {
1677 			error = ENOBUFS;
1678 			goto done;
1679 		}
1680 		error = copyin_user_iovec_array(user_msg->msg_iov,
1681 		    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1682 		    user_msg->msg_iovlen, iovp);
1683 		if (error != 0) {
1684 			goto done;
1685 		}
1686 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
1687 
1688 		/* finish setup of uio_t */
1689 		error = uio_calculateresid_user(auio);
1690 		if (error) {
1691 			goto done;
1692 		}
1693 	} else {
1694 		user_msg->msg_iov = 0;
1695 	}
1696 
1697 done:
1698 	*auiop = auio;
1699 	return error;
1700 }
1701 
1702 static int
mbuf_packet_from_uio(socket_ref_t so,mbuf_ref_ref_t mp,uio_t auio)1703 mbuf_packet_from_uio(socket_ref_t so, mbuf_ref_ref_t mp, uio_t auio)
1704 {
1705 	int error = 0;
1706 	uint16_t headroom = 0;
1707 	size_t bytes_to_alloc;
1708 	mbuf_ref_t top = NULL, m;
1709 
1710 	if (soreserveheadroom != 0) {
1711 		headroom = so->so_pktheadroom;
1712 	}
1713 	bytes_to_alloc = headroom + uio_resid(auio);
1714 
1715 	error = mbuf_allocpacket(MBUF_WAITOK, bytes_to_alloc, NULL, &top);
1716 	if (error != 0) {
1717 		os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: mbuf_allocpacket %zu error %d",
1718 		    bytes_to_alloc, error);
1719 		goto done;
1720 	}
1721 
1722 	if (headroom > 0 && headroom < mbuf_maxlen(top)) {
1723 		top->m_data += headroom;
1724 	}
1725 
1726 	for (m = top; m != NULL; m = m->m_next) {
1727 		int bytes_to_copy = (int)uio_resid(auio);
1728 		ssize_t mlen;
1729 
1730 		if ((m->m_flags & M_EXT)) {
1731 			mlen = m->m_ext.ext_size -
1732 			    M_LEADINGSPACE(m);
1733 		} else if ((m->m_flags & M_PKTHDR)) {
1734 			mlen = MHLEN - M_LEADINGSPACE(m);
1735 			m_add_crumb(m, PKT_CRUMB_SOSEND);
1736 		} else {
1737 			mlen = MLEN - M_LEADINGSPACE(m);
1738 		}
1739 		int len = imin((int)mlen, bytes_to_copy);
1740 
1741 		error = uio_copyin_user(mtod(m, caddr_t), (int)len, auio);
1742 		if (error != 0) {
1743 			os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: len %d error %d",
1744 			    len, error);
1745 			goto done;
1746 		}
1747 		m->m_len = len;
1748 		top->m_pkthdr.len += len;
1749 	}
1750 
1751 done:
1752 	if (error != 0) {
1753 		m_freem(top);
1754 	} else {
1755 		*mp = top;
1756 	}
1757 	return error;
1758 }
1759 
1760 static int
sendit_x(proc_ref_t p,socket_ref_t so,struct sendmsg_x_args * uap,u_int * retval)1761 sendit_x(proc_ref_t p, socket_ref_t so, struct sendmsg_x_args *uap, u_int *retval)
1762 {
1763 	int error = 0;
1764 	uio_t auio = NULL;
1765 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1766 	void_ptr_t src;
1767 	MBUFQ_HEAD() pktlist = {};
1768 	size_t total_pkt_len = 0;
1769 	u_int pkt_cnt = 0;
1770 	int flags = uap->flags;
1771 	mbuf_ref_t top;
1772 
1773 	MBUFQ_INIT(&pktlist);
1774 
1775 	*retval = 0;
1776 
1777 	/* We re-use the uio when possible */
1778 	auio = uio_create(1, 0,
1779 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1780 	    UIO_WRITE);
1781 	if (auio == NULL) {
1782 		error = ENOBUFS;
1783 		DBG_PRINTF("%s uio_create() failed %d",
1784 		    __func__, error);
1785 		goto done;
1786 	}
1787 
1788 	src = (void_ptr_t)uap->msgp;
1789 
1790 	/*
1791 	 * Create a list of packets
1792 	 */
1793 	for (u_int i = 0; i < uap->cnt; i++) {
1794 		struct user_msghdr user_msg = {};
1795 		mbuf_ref_t m = NULL;
1796 
1797 		if (is_p_64bit_process) {
1798 			error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user64_msghdr_x *)src) + i);
1799 			if (error != 0) {
1800 				os_log(OS_LOG_DEFAULT, "sendit_x: internalize_user_msg_x error %d\n", error);
1801 				goto done;
1802 			}
1803 		} else {
1804 			error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user32_msghdr_x *)src) + i);
1805 			if (error != 0) {
1806 				os_log(OS_LOG_DEFAULT, "sendit_x: internalize_user_msg_x error %d\n", error);
1807 				goto done;
1808 			}
1809 		}
1810 		/*
1811 		 * Stop on the first datagram that is too large
1812 		 */
1813 		if (uio_resid(auio) > so->so_snd.sb_hiwat) {
1814 			if (i == 0) {
1815 				error = EMSGSIZE;
1816 				goto done;
1817 			}
1818 			break;
1819 		}
1820 		/*
1821 		 * An mbuf packet has the control mbuf(s) followed by data
1822 		 * We allocate the mbufs in reverse order
1823 		 */
1824 		error = mbuf_packet_from_uio(so, &m, auio);
1825 		if (error != 0) {
1826 			os_log(OS_LOG_DEFAULT, "sendit_x: mbuf_packet_from_uio error %d\n", error);
1827 			goto done;
1828 		}
1829 		total_pkt_len += m->m_pkthdr.len;
1830 
1831 		if (user_msg.msg_control != USER_ADDR_NULL && user_msg.msg_controllen != 0) {
1832 			mbuf_ref_t control = NULL;
1833 
1834 			error = sockargs(&control, user_msg.msg_control, user_msg.msg_controllen, MT_CONTROL);
1835 			if (error != 0) {
1836 				os_log(OS_LOG_DEFAULT, "sendit_x: sockargs error %d\n", error);
1837 				goto done;
1838 			}
1839 			control->m_next = m;
1840 			m = control;
1841 		}
1842 		MBUFQ_ENQUEUE(&pktlist, m);
1843 
1844 		pkt_cnt += 1;
1845 	}
1846 
1847 	top = MBUFQ_FIRST(&pktlist);
1848 	MBUFQ_INIT(&pktlist);
1849 	error = sosend_list(so, top, total_pkt_len, &pkt_cnt, flags);
1850 	if (error != 0) {
1851 		os_log(OS_LOG_DEFAULT, "sendit_x: sosend_list error %d\n", error);
1852 		goto done;
1853 	}
1854 done:
1855 	*retval = pkt_cnt;
1856 
1857 	if (auio != NULL) {
1858 		uio_free(auio);
1859 	}
1860 	MBUFQ_DRAIN(&pktlist);
1861 	return error;
1862 }
1863 
1864 int
sendmsg_x(proc_ref_t p,struct sendmsg_x_args * uap,user_ssize_t * retval)1865 sendmsg_x(proc_ref_t p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1866 {
1867 	void_ptr_t src;
1868 	int error;
1869 	uio_t auio = NULL;
1870 	socket_ref_t so;
1871 	u_int uiocnt = 0;
1872 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1873 
1874 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1875 	AUDIT_ARG(fd, uap->s);
1876 
1877 	if (uap->flags & MSG_SKIPCFIL) {
1878 		error = EPERM;
1879 		goto done_no_filedrop;
1880 	}
1881 
1882 	error = file_socket(uap->s, &so);
1883 	if (error) {
1884 		goto done_no_filedrop;
1885 	}
1886 	if (so == NULL) {
1887 		error = EBADF;
1888 		goto done;
1889 	}
1890 
1891 	/*
1892 	 * For an atomic datagram connected socket we can build the list of
1893 	 * mbuf packets with sosend_list()
1894 	 */
1895 	if (so->so_type == SOCK_DGRAM && sosendallatonce(so) &&
1896 	    (so->so_state & SS_ISCONNECTED) && sendmsg_x_mode != 1) {
1897 		error = sendit_x(p, so, uap, &uiocnt);
1898 		if (error != 0) {
1899 			DBG_PRINTF("%s sendit_x() failed %d",
1900 			    __func__, error);
1901 		}
1902 		goto done;
1903 	}
1904 
1905 	src = (void_ptr_t)uap->msgp;
1906 
1907 	/* We re-use the uio when possible */
1908 	auio = uio_create(1, 0,
1909 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1910 	    UIO_WRITE);
1911 	if (auio == NULL) {
1912 		error = ENOBUFS;
1913 		DBG_PRINTF("%s uio_create() failed %d",
1914 		    __func__, error);
1915 		goto done;
1916 	}
1917 
1918 	for (u_int i = 0; i < uap->cnt; i++) {
1919 		struct user_msghdr user_msg = {};
1920 
1921 		if (is_p_64bit_process) {
1922 			error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user64_msghdr_x *)src) + i);
1923 			if (error != 0) {
1924 				goto done;
1925 			}
1926 		} else {
1927 			error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user32_msghdr_x *)src) + i);
1928 			if (error != 0) {
1929 				goto done;
1930 			}
1931 		}
1932 
1933 		int32_t len = 0;
1934 		error = sendit(p, so, &user_msg, auio, uap->flags, &len);
1935 		if (error != 0) {
1936 			break;
1937 		}
1938 		uiocnt += 1;
1939 	}
1940 done:
1941 	if (error != 0) {
1942 		if (uiocnt != 0 && (error == ERESTART ||
1943 		    error == EINTR || error == EWOULDBLOCK ||
1944 		    error == ENOBUFS || error == EMSGSIZE)) {
1945 			error = 0;
1946 		}
1947 		/* Generation of SIGPIPE can be controlled per socket */
1948 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1949 		    !(uap->flags & MSG_NOSIGNAL)) {
1950 			psignal(p, SIGPIPE);
1951 		}
1952 	}
1953 	if (error == 0) {
1954 		*retval = (int)(uiocnt);
1955 	}
1956 	file_drop(uap->s);
1957 
1958 done_no_filedrop:
1959 	if (auio != NULL) {
1960 		uio_free(auio);
1961 	}
1962 	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1963 
1964 	return error;
1965 }
1966 
1967 
1968 static int
copyout_sa(sockaddr_ref_t fromsa,user_addr_t name,socklen_t * namelen)1969 copyout_sa(sockaddr_ref_t fromsa, user_addr_t name, socklen_t *namelen)
1970 {
1971 	int error = 0;
1972 	socklen_t sa_len = 0;
1973 	ssize_t len;
1974 
1975 	len = *namelen;
1976 	if (len <= 0 || fromsa == 0) {
1977 		len = 0;
1978 	} else {
1979 #ifndef MIN
1980 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1981 #endif
1982 		sa_len = fromsa->sa_len;
1983 		len = MIN((unsigned int)len, sa_len);
1984 		error = copyout(fromsa, name, (unsigned)len);
1985 		if (error) {
1986 			goto out;
1987 		}
1988 	}
1989 	*namelen = sa_len;
1990 out:
1991 	return 0;
1992 }
1993 
1994 static int
copyout_maddr(struct mbuf * m,user_addr_t name,socklen_t * namelen)1995 copyout_maddr(struct mbuf *m, user_addr_t name, socklen_t *namelen)
1996 {
1997 	int error = 0;
1998 	socklen_t sa_len = 0;
1999 	ssize_t len;
2000 
2001 	len = *namelen;
2002 	if (len <= 0 || m == NULL) {
2003 		len = 0;
2004 	} else {
2005 #ifndef MIN
2006 #define MIN(a, b) ((a) > (b) ? (b) : (a))
2007 #endif
2008 		struct sockaddr *fromsa = mtod(m, struct sockaddr *);
2009 
2010 		sa_len = fromsa->sa_len;
2011 		len = MIN((unsigned int)len, sa_len);
2012 		error = copyout(fromsa, name, (unsigned)len);
2013 		if (error != 0) {
2014 			goto out;
2015 		}
2016 	}
2017 	*namelen = sa_len;
2018 out:
2019 	return 0;
2020 }
2021 
2022 static int
copyout_control(proc_ref_t p,mbuf_ref_t m,user_addr_t control,socklen_ref_t controllen,int_ref_t flags,socket_ref_t so)2023 copyout_control(proc_ref_t p, mbuf_ref_t m, user_addr_t control,
2024     socklen_ref_t controllen, int_ref_t flags, socket_ref_t so)
2025 {
2026 	int error = 0;
2027 	socklen_t len;
2028 	user_addr_t ctlbuf;
2029 	struct inpcb *inp = NULL;
2030 	bool want_pktinfo = false;
2031 	bool seen_pktinfo = false;
2032 
2033 	if (so != NULL && (SOCK_DOM(so) == PF_INET6 || SOCK_DOM(so) == PF_INET)) {
2034 		inp = sotoinpcb(so);
2035 		want_pktinfo = (inp->inp_flags & IN6P_PKTINFO) != 0;
2036 	}
2037 
2038 	len = *controllen;
2039 	*controllen = 0;
2040 	ctlbuf = control;
2041 
2042 	while (m && len > 0) {
2043 		socklen_t tocopy;
2044 		struct cmsghdr *cp = mtod(m, struct cmsghdr *);
2045 		socklen_t cp_size = CMSG_ALIGN(cp->cmsg_len);
2046 		socklen_t buflen = m->m_len;
2047 
2048 		while (buflen > 0 && len > 0) {
2049 			/*
2050 			 * SCM_TIMESTAMP hack because  struct timeval has a
2051 			 * different size for 32 bits and 64 bits processes
2052 			 */
2053 			if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
2054 				unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
2055 				struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
2056 				socklen_t tmp_space;
2057 				struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
2058 
2059 				tmp_cp->cmsg_level = SOL_SOCKET;
2060 				tmp_cp->cmsg_type = SCM_TIMESTAMP;
2061 
2062 				if (proc_is64bit(p)) {
2063 					struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
2064 
2065 					os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
2066 					os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
2067 
2068 					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
2069 					tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
2070 				} else {
2071 					struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
2072 
2073 					tv32->tv_sec = (user32_time_t)tv->tv_sec;
2074 					tv32->tv_usec = tv->tv_usec;
2075 
2076 					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
2077 					tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
2078 				}
2079 				if (len >= tmp_space) {
2080 					tocopy = tmp_space;
2081 				} else {
2082 					*flags |= MSG_CTRUNC;
2083 					tocopy = len;
2084 				}
2085 				error = copyout(tmp_buffer, ctlbuf, tocopy);
2086 				if (error) {
2087 					goto out;
2088 				}
2089 			} else {
2090 				/* If socket has flow tracking and socket did not request address, ignore it */
2091 				if (SOFLOW_ENABLED(so) &&
2092 				    ((cp->cmsg_level == IPPROTO_IP && cp->cmsg_type == IP_RECVDSTADDR && inp != NULL &&
2093 				    !(inp->inp_flags & INP_RECVDSTADDR)) ||
2094 				    (cp->cmsg_level == IPPROTO_IPV6 && (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO) && inp &&
2095 				    !(inp->inp_flags & IN6P_PKTINFO)))) {
2096 					tocopy = 0;
2097 				} else {
2098 					if (cp_size > buflen) {
2099 						panic("cp_size > buflen, something wrong with alignment!");
2100 					}
2101 					if (len >= cp_size) {
2102 						tocopy = cp_size;
2103 					} else {
2104 						*flags |= MSG_CTRUNC;
2105 						tocopy = len;
2106 					}
2107 					error = copyout((caddr_t) cp, ctlbuf, tocopy);
2108 					if (error) {
2109 						goto out;
2110 					}
2111 					if (want_pktinfo && cp->cmsg_level == IPPROTO_IPV6 &&
2112 					    (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO)) {
2113 						seen_pktinfo = true;
2114 					}
2115 				}
2116 			}
2117 
2118 
2119 			ctlbuf += tocopy;
2120 			len -= tocopy;
2121 
2122 			buflen -= cp_size;
2123 			cp = (struct cmsghdr *)(void *)
2124 			    ((unsigned char *) cp + cp_size);
2125 			cp_size = CMSG_ALIGN(cp->cmsg_len);
2126 		}
2127 
2128 		m = m->m_next;
2129 	}
2130 	*controllen = (socklen_t)(ctlbuf - control);
2131 out:
2132 	if (want_pktinfo && !seen_pktinfo) {
2133 		missingpktinfo += 1;
2134 #if (DEBUG || DEVELOPMENT)
2135 		char pname[MAXCOMLEN];
2136 		char local[MAX_IPv6_STR_LEN + 6];
2137 		char remote[MAX_IPv6_STR_LEN + 6];
2138 
2139 		proc_name(so->last_pid, pname, sizeof(MAXCOMLEN));
2140 		if (inp->inp_vflag & INP_IPV6) {
2141 			inet_ntop(AF_INET6, &inp->in6p_laddr.s6_addr, local, sizeof(local));
2142 			inet_ntop(AF_INET6, &inp->in6p_faddr.s6_addr, remote, sizeof(local));
2143 		} else {
2144 			inet_ntop(AF_INET, &inp->inp_laddr.s_addr, local, sizeof(local));
2145 			inet_ntop(AF_INET, &inp->inp_faddr.s_addr, remote, sizeof(local));
2146 		}
2147 
2148 		os_log(OS_LOG_DEFAULT,
2149 		    "cmsg IPV6_PKTINFO missing for %s:%u > %s:%u proc %s.%u error %d\n",
2150 		    local, ntohs(inp->inp_lport), remote, ntohs(inp->inp_fport),
2151 		    pname, so->last_pid, error);
2152 #endif /* (DEBUG || DEVELOPMENT) */
2153 	}
2154 	return error;
2155 }
2156 
2157 /*
2158  * Returns:	0			Success
2159  *		ENOTSOCK
2160  *		EINVAL
2161  *		EBADF
2162  *		EACCES			Mandatory Access Control failure
2163  *	copyout:EFAULT
2164  *	fp_lookup:EBADF
2165  *	<pru_soreceive>:ENOBUFS
2166  *	<pru_soreceive>:ENOTCONN
2167  *	<pru_soreceive>:EWOULDBLOCK
2168  *	<pru_soreceive>:EFAULT
2169  *	<pru_soreceive>:EINTR
2170  *	<pru_soreceive>:EBADF
2171  *	<pru_soreceive>:EINVAL
2172  *	<pru_soreceive>:EMSGSIZE
2173  *	<pru_soreceive>:???
2174  *
2175  * Notes:	Additional return values from calls through <pru_soreceive>
2176  *		depend on protocols other than TCP or AF_UNIX, which are
2177  *		documented above.
2178  */
2179 static int
recvit(proc_ref_t p,int s,user_msghdr_ref_t mp,uio_t uiop,user_addr_t namelenp,int32_ref_t retval)2180 recvit(proc_ref_t p, int s, user_msghdr_ref_t mp, uio_t uiop,
2181     user_addr_t namelenp, int32_ref_t retval)
2182 {
2183 	ssize_t len;
2184 	int error;
2185 	mbuf_ref_t  control = 0;
2186 	socket_ref_t so;
2187 	sockaddr_ref_t  fromsa = 0;
2188 	fileproc_ref_t  fp;
2189 
2190 	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
2191 	if ((error = fp_get_ftype(p, s, DTYPE_SOCKET, ENOTSOCK, &fp))) {
2192 		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2193 		return error;
2194 	}
2195 	so = (struct socket *)fp_get_data(fp);
2196 
2197 #if CONFIG_MACF_SOCKET_SUBSET
2198 	/*
2199 	 * We check the state without holding the socket lock;
2200 	 * if a race condition occurs, it would simply result
2201 	 * in an extra call to the MAC check function.
2202 	 */
2203 	if (!(so->so_state & SS_DEFUNCT) &&
2204 	    !(so->so_state & SS_ISCONNECTED) &&
2205 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2206 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2207 		goto out1;
2208 	}
2209 #endif /* MAC_SOCKET_SUBSET */
2210 	if (uio_resid(uiop) < 0 || uio_resid(uiop) > INT_MAX) {
2211 		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
2212 		error = EINVAL;
2213 		goto out1;
2214 	}
2215 
2216 	len = uio_resid(uiop);
2217 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
2218 	    NULL, mp->msg_control ? &control : NULL,
2219 	    &mp->msg_flags);
2220 	if (fromsa) {
2221 		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
2222 		    fromsa);
2223 	}
2224 	if (error) {
2225 		if (uio_resid(uiop) != len && (error == ERESTART ||
2226 		    error == EINTR || error == EWOULDBLOCK)) {
2227 			error = 0;
2228 		}
2229 	}
2230 	if (error) {
2231 		goto out;
2232 	}
2233 
2234 	*retval = (int32_t)(len - uio_resid(uiop));
2235 
2236 	if (mp->msg_name) {
2237 		error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
2238 		if (error) {
2239 			goto out;
2240 		}
2241 		/* return the actual, untruncated address length */
2242 		if (namelenp &&
2243 		    (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2244 		    sizeof(int)))) {
2245 			goto out;
2246 		}
2247 	}
2248 
2249 	if (mp->msg_control) {
2250 		error = copyout_control(p, control, mp->msg_control,
2251 		    &mp->msg_controllen, &mp->msg_flags, so);
2252 	}
2253 out:
2254 	free_sockaddr(fromsa);
2255 	if (control) {
2256 		m_freem(control);
2257 	}
2258 	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2259 out1:
2260 	fp_drop(p, s, fp, 0);
2261 	return error;
2262 }
2263 
2264 /*
2265  * Returns:	0			Success
2266  *		ENOMEM
2267  *	copyin:EFAULT
2268  *	recvit:???
2269  *	read:???			[4056224: applicable for pipes]
2270  *
2271  * Notes:	The read entry point is only called as part of support for
2272  *		binary backward compatability; new code should use read
2273  *		instead of recv or recvfrom when attempting to read data
2274  *		from pipes.
2275  *
2276  *		For full documentation of the return codes from recvit, see
2277  *		the block header for the recvit function.
2278  */
2279 int
recvfrom(proc_ref_t p,struct recvfrom_args * uap,int32_ref_t retval)2280 recvfrom(proc_ref_t p, struct recvfrom_args *uap, int32_ref_t retval)
2281 {
2282 	__pthread_testcancel(1);
2283 	return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2284 	           retval);
2285 }
2286 
2287 int
recvfrom_nocancel(proc_ref_t p,struct recvfrom_nocancel_args * uap,int32_ref_t retval)2288 recvfrom_nocancel(proc_ref_t p, struct recvfrom_nocancel_args *uap,
2289     int32_ref_t retval)
2290 {
2291 	struct user_msghdr msg;
2292 	int error;
2293 	uio_t auio = NULL;
2294 
2295 	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2296 	AUDIT_ARG(fd, uap->s);
2297 
2298 	if (uap->fromlenaddr) {
2299 		error = copyin(uap->fromlenaddr,
2300 		    (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2301 		if (error) {
2302 			return error;
2303 		}
2304 	} else {
2305 		msg.msg_namelen = 0;
2306 	}
2307 	msg.msg_name = uap->from;
2308 	auio = uio_create(1, 0,
2309 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2310 	    UIO_READ);
2311 	if (auio == NULL) {
2312 		return ENOMEM;
2313 	}
2314 
2315 	uio_addiov(auio, uap->buf, uap->len);
2316 	/* no need to set up msg_iov.  recvit uses uio_t we send it */
2317 	msg.msg_iov = 0;
2318 	msg.msg_iovlen = 0;
2319 	msg.msg_control = 0;
2320 	msg.msg_controllen = 0;
2321 	msg.msg_flags = uap->flags;
2322 	error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2323 	if (auio != NULL) {
2324 		uio_free(auio);
2325 	}
2326 
2327 	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2328 
2329 	return error;
2330 }
2331 
2332 /*
2333  * Returns:	0			Success
2334  *		EMSGSIZE
2335  *		ENOMEM
2336  *	copyin:EFAULT
2337  *	copyout:EFAULT
2338  *	recvit:???
2339  *
2340  * Notes:	For full documentation of the return codes from recvit, see
2341  *		the block header for the recvit function.
2342  */
2343 int
recvmsg(proc_ref_t p,struct recvmsg_args * uap,int32_ref_t retval)2344 recvmsg(proc_ref_t p, struct recvmsg_args *uap, int32_ref_t retval)
2345 {
2346 	__pthread_testcancel(1);
2347 	return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2348 	           retval);
2349 }
2350 
2351 int
recvmsg_nocancel(proc_ref_t p,struct recvmsg_nocancel_args * uap,int32_ref_t retval)2352 recvmsg_nocancel(proc_ref_t p, struct recvmsg_nocancel_args *uap,
2353     int32_ref_t retval)
2354 {
2355 	struct user32_msghdr msg32;
2356 	struct user64_msghdr msg64;
2357 	struct user_msghdr user_msg;
2358 	caddr_t msghdrp;
2359 	int     size_of_msghdr;
2360 	user_addr_t uiov;
2361 	int error;
2362 	uio_t auio = NULL;
2363 	struct user_iovec *iovp;
2364 
2365 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2366 
2367 	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2368 	AUDIT_ARG(fd, uap->s);
2369 	if (is_p_64bit_process) {
2370 		msghdrp = (caddr_t)&msg64;
2371 		size_of_msghdr = sizeof(msg64);
2372 	} else {
2373 		msghdrp = (caddr_t)&msg32;
2374 		size_of_msghdr = sizeof(msg32);
2375 	}
2376 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
2377 	if (error) {
2378 		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2379 		return error;
2380 	}
2381 
2382 	/* only need to copy if user process is not 64-bit */
2383 	if (is_p_64bit_process) {
2384 		user_msg.msg_flags = msg64.msg_flags;
2385 		user_msg.msg_controllen = msg64.msg_controllen;
2386 		user_msg.msg_control = (user_addr_t)msg64.msg_control;
2387 		user_msg.msg_iovlen = msg64.msg_iovlen;
2388 		user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
2389 		user_msg.msg_namelen = msg64.msg_namelen;
2390 		user_msg.msg_name = (user_addr_t)msg64.msg_name;
2391 	} else {
2392 		user_msg.msg_flags = msg32.msg_flags;
2393 		user_msg.msg_controllen = msg32.msg_controllen;
2394 		user_msg.msg_control = msg32.msg_control;
2395 		user_msg.msg_iovlen = msg32.msg_iovlen;
2396 		user_msg.msg_iov = msg32.msg_iov;
2397 		user_msg.msg_namelen = msg32.msg_namelen;
2398 		user_msg.msg_name = msg32.msg_name;
2399 	}
2400 
2401 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2402 		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2403 		    0, 0, 0, 0);
2404 		return EMSGSIZE;
2405 	}
2406 
2407 	user_msg.msg_flags = uap->flags;
2408 
2409 	/* allocate a uio large enough to hold the number of iovecs passed */
2410 	auio = uio_create(user_msg.msg_iovlen, 0,
2411 	    (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
2412 	    UIO_READ);
2413 	if (auio == NULL) {
2414 		error = ENOMEM;
2415 		goto done;
2416 	}
2417 
2418 	/*
2419 	 * get location of iovecs within the uio.  then copyin the iovecs from
2420 	 * user space.
2421 	 */
2422 	iovp = uio_iovsaddr_user(auio);
2423 	if (iovp == NULL) {
2424 		error = ENOMEM;
2425 		goto done;
2426 	}
2427 	uiov = user_msg.msg_iov;
2428 	user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2429 	error = copyin_user_iovec_array(uiov,
2430 	    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2431 	    user_msg.msg_iovlen, iovp);
2432 	if (error) {
2433 		goto done;
2434 	}
2435 
2436 	/* finish setup of uio_t */
2437 	error = uio_calculateresid_user(auio);
2438 	if (error) {
2439 		goto done;
2440 	}
2441 
2442 	error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2443 	if (!error) {
2444 		user_msg.msg_iov = uiov;
2445 		if (is_p_64bit_process) {
2446 			msg64.msg_flags = user_msg.msg_flags;
2447 			msg64.msg_controllen = user_msg.msg_controllen;
2448 			msg64.msg_control = user_msg.msg_control;
2449 			msg64.msg_iovlen = user_msg.msg_iovlen;
2450 			msg64.msg_iov = user_msg.msg_iov;
2451 			msg64.msg_namelen = user_msg.msg_namelen;
2452 			msg64.msg_name = user_msg.msg_name;
2453 		} else {
2454 			msg32.msg_flags = user_msg.msg_flags;
2455 			msg32.msg_controllen = user_msg.msg_controllen;
2456 			msg32.msg_control = (user32_addr_t)user_msg.msg_control;
2457 			msg32.msg_iovlen = user_msg.msg_iovlen;
2458 			msg32.msg_iov = (user32_addr_t)user_msg.msg_iov;
2459 			msg32.msg_namelen = user_msg.msg_namelen;
2460 			msg32.msg_name = (user32_addr_t)user_msg.msg_name;
2461 		}
2462 		error = copyout(msghdrp, uap->msg, size_of_msghdr);
2463 	}
2464 done:
2465 	if (auio != NULL) {
2466 		uio_free(auio);
2467 	}
2468 	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2469 	return error;
2470 }
2471 
2472 __attribute__((noinline))
2473 static int
recvmsg_x_array(proc_ref_t p,socket_ref_t so,struct recvmsg_x_args * uap,user_ssize_t * retval)2474 recvmsg_x_array(proc_ref_t p, socket_ref_t so, struct recvmsg_x_args *uap, user_ssize_t *retval)
2475 {
2476 	int error = EOPNOTSUPP;
2477 	user_msghdr_x_ptr_t user_msg_x = NULL;
2478 	recv_msg_elem_ptr_t recv_msg_array = NULL;
2479 	user_ssize_t len_before = 0, len_after;
2480 	size_t size_of_msghdr;
2481 	void_ptr_t umsgp = NULL;
2482 	u_int i;
2483 	u_int uiocnt;
2484 	int flags = uap->flags;
2485 
2486 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2487 
2488 	size_of_msghdr = is_p_64bit_process ?
2489 	    sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2490 
2491 	/*
2492 	 * Support only a subset of message flags
2493 	 */
2494 	if (uap->flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA |  MSG_NBIO)) {
2495 		return EOPNOTSUPP;
2496 	}
2497 	/*
2498 	 * Input parameter range check
2499 	 */
2500 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2501 		error = EINVAL;
2502 		goto out;
2503 	}
2504 	if (uap->cnt > somaxrecvmsgx) {
2505 		uap->cnt = somaxrecvmsgx > 0 ? somaxrecvmsgx : 1;
2506 	}
2507 
2508 	user_msg_x = kalloc_type(struct user_msghdr_x, uap->cnt,
2509 	    Z_WAITOK | Z_ZERO);
2510 	if (user_msg_x == NULL) {
2511 		DBG_PRINTF("%s user_msg_x alloc failed", __func__);
2512 		error = ENOMEM;
2513 		goto out;
2514 	}
2515 	recv_msg_array = alloc_recv_msg_array(uap->cnt);
2516 	if (recv_msg_array == NULL) {
2517 		DBG_PRINTF("%s alloc_recv_msg_array() failed", __func__);
2518 		error = ENOMEM;
2519 		goto out;
2520 	}
2521 
2522 	umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
2523 	if (umsgp == NULL) {
2524 		DBG_PRINTF("%s umsgp alloc failed", __func__);
2525 		error = ENOMEM;
2526 		goto out;
2527 	}
2528 	error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2529 	if (error) {
2530 		DBG_PRINTF("%s copyin() failed", __func__);
2531 		goto out;
2532 	}
2533 	error = internalize_recv_msghdr_array(umsgp,
2534 	    is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2535 	    UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2536 	if (error) {
2537 		DBG_PRINTF("%s copyin_user_msghdr_array() failed", __func__);
2538 		goto out;
2539 	}
2540 	/*
2541 	 * Make sure the size of each message iovec and
2542 	 * the aggregate size of all the iovec is valid
2543 	 */
2544 	if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2545 		error = EINVAL;
2546 		goto out;
2547 	}
2548 	/*
2549 	 * Sanity check on passed arguments
2550 	 */
2551 	for (i = 0; i < uap->cnt; i++) {
2552 		struct user_msghdr_x *mp = user_msg_x + i;
2553 
2554 		if (mp->msg_flags != 0) {
2555 			error = EINVAL;
2556 			goto out;
2557 		}
2558 	}
2559 #if CONFIG_MACF_SOCKET_SUBSET
2560 	/*
2561 	 * We check the state without holding the socket lock;
2562 	 * if a race condition occurs, it would simply result
2563 	 * in an extra call to the MAC check function.
2564 	 */
2565 	if (!(so->so_state & SS_DEFUNCT) &&
2566 	    !(so->so_state & SS_ISCONNECTED) &&
2567 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2568 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2569 		goto out;
2570 	}
2571 #endif /* MAC_SOCKET_SUBSET */
2572 
2573 	len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2574 
2575 	for (i = 0; i < uap->cnt; i++) {
2576 		struct recv_msg_elem *recv_msg_elem;
2577 		uio_t auio;
2578 		sockaddr_ref_ref_t psa;
2579 		struct mbuf **controlp;
2580 
2581 		recv_msg_elem = recv_msg_array + i;
2582 		auio = recv_msg_elem->uio;
2583 
2584 		/*
2585 		 * Do not block if we got at least one packet
2586 		 */
2587 		if (i > 0) {
2588 			flags |= MSG_DONTWAIT;
2589 		}
2590 
2591 		psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2592 		    &recv_msg_elem->psa : NULL;
2593 		controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2594 		    &recv_msg_elem->controlp : NULL;
2595 
2596 		error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2597 		    auio, NULL, controlp, &flags);
2598 		if (error) {
2599 			break;
2600 		}
2601 		/*
2602 		 * We have some data
2603 		 */
2604 		recv_msg_elem->which |= SOCK_MSG_DATA;
2605 		/*
2606 		 * Set the messages flags for this packet
2607 		 */
2608 		flags &= ~MSG_DONTWAIT;
2609 		recv_msg_elem->flags = flags;
2610 		/*
2611 		 * Stop on partial copy
2612 		 */
2613 		if (recv_msg_elem->flags & (MSG_RCVMORE | MSG_TRUNC)) {
2614 			break;
2615 		}
2616 	}
2617 
2618 	len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2619 
2620 	if (error) {
2621 		if (len_after != len_before && (error == ERESTART ||
2622 		    error == EINTR || error == EWOULDBLOCK)) {
2623 			error = 0;
2624 		} else {
2625 			goto out;
2626 		}
2627 	}
2628 
2629 	uiocnt = externalize_recv_msghdr_array(p, so, umsgp,
2630 	    uap->cnt, user_msg_x, recv_msg_array, &error);
2631 	if (error != 0) {
2632 		goto out;
2633 	}
2634 
2635 	error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2636 	if (error) {
2637 		DBG_PRINTF("%s copyout() failed", __func__);
2638 		goto out;
2639 	}
2640 	*retval = (int)(uiocnt);
2641 
2642 out:
2643 	kfree_data(umsgp, uap->cnt * size_of_msghdr);
2644 	free_recv_msg_array(recv_msg_array, uap->cnt);
2645 	kfree_type(struct user_msghdr_x, uap->cnt, user_msg_x);
2646 
2647 	return error;
2648 }
2649 
2650 int
recvmsg_x(struct proc * p,struct recvmsg_x_args * uap,user_ssize_t * retval)2651 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2652 {
2653 	int error = EOPNOTSUPP;
2654 	socket_ref_t so;
2655 	size_t size_of_msghdrx;
2656 	caddr_t msghdrxp;
2657 	struct user32_msghdr_x msghdrx32 = {};
2658 	struct user64_msghdr_x msghdrx64 = {};
2659 	int spacetype;
2660 	u_int i;
2661 	uio_t auio = NULL;
2662 	caddr_t src;
2663 	int flags;
2664 	struct mbuf *pkt_list = NULL, *m;
2665 	struct mbuf *addr_list = NULL, *m_addr;
2666 	struct mbuf *ctl_list = NULL, *control;
2667 	u_int pktcnt;
2668 
2669 	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2670 
2671 	error = file_socket(uap->s, &so);
2672 	if (error) {
2673 		goto done_no_filedrop;
2674 	}
2675 	if (so == NULL) {
2676 		error = EBADF;
2677 		goto done;
2678 	}
2679 
2680 #if CONFIG_MACF_SOCKET_SUBSET
2681 	/*
2682 	 * We check the state without holding the socket lock;
2683 	 * if a race condition occurs, it would simply result
2684 	 * in an extra call to the MAC check function.
2685 	 */
2686 	if (!(so->so_state & SS_DEFUNCT) &&
2687 	    !(so->so_state & SS_ISCONNECTED) &&
2688 	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2689 	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2690 		goto done;
2691 	}
2692 #endif /* MAC_SOCKET_SUBSET */
2693 
2694 	/*
2695 	 * With soreceive_m_list, all packets must be uniform, with address and
2696 	 * control as they are returned in parallel lists and it's only guaranteed
2697 	 * when pru_send_list is supported
2698 	 */
2699 	if (do_recvmsg_x_donttrunc != 0 || (so->so_options & SO_DONTTRUNC)) {
2700 		error = recvmsg_x_array(p, so, uap, retval);
2701 		goto done;
2702 	}
2703 
2704 	/*
2705 	 * Input parameter range check
2706 	 */
2707 	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2708 		error = EINVAL;
2709 		goto done;
2710 	}
2711 	if (uap->cnt > somaxrecvmsgx) {
2712 		uap->cnt = somaxrecvmsgx > 0 ? somaxrecvmsgx : 1;
2713 	}
2714 
2715 	if (IS_64BIT_PROCESS(p)) {
2716 		msghdrxp = (caddr_t)&msghdrx64;
2717 		size_of_msghdrx = sizeof(struct user64_msghdr_x);
2718 		spacetype = UIO_USERSPACE64;
2719 	} else {
2720 		msghdrxp = (caddr_t)&msghdrx32;
2721 		size_of_msghdrx = sizeof(struct user32_msghdr_x);
2722 		spacetype = UIO_USERSPACE32;
2723 	}
2724 	src = (caddr_t)uap->msgp;
2725 
2726 	flags = uap->flags;
2727 
2728 	/*
2729 	 * Only allow MSG_DONTWAIT
2730 	 */
2731 	if ((flags & ~(MSG_DONTWAIT | MSG_NBIO)) != 0) {
2732 		error = EINVAL;
2733 		goto done;
2734 	}
2735 
2736 	/*
2737 	 * Receive list of packet in a single call
2738 	 */
2739 	pktcnt = uap->cnt;
2740 	error = soreceive_m_list(so, &pktcnt, &addr_list, &pkt_list, &ctl_list,
2741 	    &flags);
2742 	if (error != 0) {
2743 		if (pktcnt != 0 && (error == ERESTART ||
2744 		    error == EINTR || error == EWOULDBLOCK)) {
2745 			error = 0;
2746 		} else {
2747 			goto done;
2748 		}
2749 	}
2750 
2751 	m_addr = addr_list;
2752 	m = pkt_list;
2753 	control = ctl_list;
2754 
2755 	for (i = 0; i < pktcnt; i++) {
2756 		struct user_msghdr user_msg;
2757 		ssize_t len;
2758 		struct user_iovec *iovp;
2759 		struct mbuf *n;
2760 
2761 		if (!m_has_mtype(m, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
2762 			panic("%s: m %p m_type %d != MT_DATA", __func__, m, m->m_type);
2763 		}
2764 
2765 		error = copyin((user_addr_t)(src + i * size_of_msghdrx),
2766 		    msghdrxp, size_of_msghdrx);
2767 		if (error) {
2768 			DBG_PRINTF("%s copyin() msghdrx failed %d\n",
2769 			    __func__, error);
2770 			goto done;
2771 		}
2772 		if (spacetype == UIO_USERSPACE64) {
2773 			user_msg.msg_name = msghdrx64.msg_name;
2774 			user_msg.msg_namelen = msghdrx64.msg_namelen;
2775 			user_msg.msg_iov = msghdrx64.msg_iov;
2776 			user_msg.msg_iovlen = msghdrx64.msg_iovlen;
2777 			user_msg.msg_control = msghdrx64.msg_control;
2778 			user_msg.msg_controllen = msghdrx64.msg_controllen;
2779 		} else {
2780 			user_msg.msg_name = msghdrx32.msg_name;
2781 			user_msg.msg_namelen = msghdrx32.msg_namelen;
2782 			user_msg.msg_iov = msghdrx32.msg_iov;
2783 			user_msg.msg_iovlen = msghdrx32.msg_iovlen;
2784 			user_msg.msg_control = msghdrx32.msg_control;
2785 			user_msg.msg_controllen = msghdrx32.msg_controllen;
2786 		}
2787 		user_msg.msg_flags = 0;
2788 		if (user_msg.msg_iovlen <= 0 ||
2789 		    user_msg.msg_iovlen > UIO_MAXIOV) {
2790 			error = EMSGSIZE;
2791 			DBG_PRINTF("%s bad msg_iovlen, error %d\n",
2792 			    __func__, error);
2793 			goto done;
2794 		}
2795 		/*
2796 		 * Attempt to reuse the uio if large enough, otherwise we need
2797 		 * a new one
2798 		 */
2799 		if (auio != NULL) {
2800 			if (auio->uio_max_iovs <= user_msg.msg_iovlen) {
2801 				uio_reset_fast(auio, 0, spacetype, UIO_READ);
2802 			} else {
2803 				uio_free(auio);
2804 				auio = NULL;
2805 			}
2806 		}
2807 		if (auio == NULL) {
2808 			auio = uio_create(user_msg.msg_iovlen, 0, spacetype,
2809 			    UIO_READ);
2810 			if (auio == NULL) {
2811 				error = ENOBUFS;
2812 				DBG_PRINTF("%s uio_create() failed %d\n",
2813 				    __func__, error);
2814 				goto done;
2815 			}
2816 		}
2817 		/*
2818 		 * get location of iovecs within the uio then copy the iovecs
2819 		 * from user space.
2820 		 */
2821 		iovp = uio_iovsaddr_user(auio);
2822 		if (iovp == NULL) {
2823 			error = ENOMEM;
2824 			DBG_PRINTF("%s uio_iovsaddr() failed %d\n",
2825 			    __func__, error);
2826 			goto done;
2827 		}
2828 		error = copyin_user_iovec_array(user_msg.msg_iov,
2829 		    spacetype, user_msg.msg_iovlen, iovp);
2830 		if (error != 0) {
2831 			DBG_PRINTF("%s copyin_user_iovec_array() failed %d\n",
2832 			    __func__, error);
2833 			goto done;
2834 		}
2835 		error = uio_calculateresid_user(auio);
2836 		if (error != 0) {
2837 			DBG_PRINTF("%s uio_calculateresid() failed %d\n",
2838 			    __func__, error);
2839 			goto done;
2840 		}
2841 		user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2842 
2843 		len = uio_resid(auio);
2844 		for (n = m; n != NULL; n = n->m_next) {
2845 			user_ssize_t resid = uio_resid(auio);
2846 			if (resid < n->m_len) {
2847 				error = uio_copyout_user(mtod(n, caddr_t), (int)n->m_len, auio);
2848 				if (error != 0) {
2849 					DBG_PRINTF("%s uiomove() failed\n",
2850 					    __func__);
2851 					goto done;
2852 				}
2853 				flags |= MSG_TRUNC;
2854 				break;
2855 			}
2856 
2857 			error = uio_copyout_user(mtod(n, caddr_t), (int)n->m_len, auio);
2858 			if (error != 0) {
2859 				DBG_PRINTF("%s uiomove() failed\n",
2860 				    __func__);
2861 				goto done;
2862 			}
2863 		}
2864 		len -= uio_resid(auio);
2865 
2866 		if (user_msg.msg_name != 0 && user_msg.msg_namelen != 0) {
2867 			error = copyout_maddr(m_addr, user_msg.msg_name,
2868 			    &user_msg.msg_namelen);
2869 			if (error) {
2870 				DBG_PRINTF("%s copyout_maddr()  failed\n",
2871 				    __func__);
2872 				goto done;
2873 			}
2874 		}
2875 		if (user_msg.msg_control != 0 && user_msg.msg_controllen != 0) {
2876 			error = copyout_control(p, control,
2877 			    user_msg.msg_control, &user_msg.msg_controllen,
2878 			    &user_msg.msg_flags, so);
2879 			if (error) {
2880 				DBG_PRINTF("%s copyout_control() failed\n",
2881 				    __func__);
2882 				goto done;
2883 			}
2884 		}
2885 		/*
2886 		 * Note: the original msg_iovlen and msg_iov do not change
2887 		 */
2888 		if (spacetype == UIO_USERSPACE64) {
2889 			msghdrx64.msg_flags = user_msg.msg_flags;
2890 			msghdrx64.msg_controllen = user_msg.msg_controllen;
2891 			msghdrx64.msg_control = user_msg.msg_control;
2892 			msghdrx64.msg_namelen = user_msg.msg_namelen;
2893 			msghdrx64.msg_name = user_msg.msg_name;
2894 			msghdrx64.msg_datalen = len;
2895 		} else {
2896 			msghdrx32.msg_flags = user_msg.msg_flags;
2897 			msghdrx32.msg_controllen = user_msg.msg_controllen;
2898 			msghdrx32.msg_control = (user32_addr_t) user_msg.msg_control;
2899 			msghdrx32.msg_name = user_msg.msg_namelen;
2900 			msghdrx32.msg_name = (user32_addr_t) user_msg.msg_name;
2901 			msghdrx32.msg_datalen = (user32_size_t) len;
2902 		}
2903 		error = copyout(msghdrxp,
2904 		    (user_addr_t)(src + i * size_of_msghdrx),
2905 		    size_of_msghdrx);
2906 		if (error) {
2907 			DBG_PRINTF("%s copyout() msghdrx failed\n", __func__);
2908 			goto done;
2909 		}
2910 
2911 		m = m->m_nextpkt;
2912 		if (control != NULL) {
2913 			control = control->m_nextpkt;
2914 		}
2915 		if (m_addr != NULL) {
2916 			m_addr = m_addr->m_nextpkt;
2917 		}
2918 	}
2919 
2920 	uap->flags = flags;
2921 
2922 	*retval = (int)i;
2923 done:
2924 	file_drop(uap->s);
2925 
2926 done_no_filedrop:
2927 	if (pkt_list != NULL) {
2928 		m_freem_list(pkt_list);
2929 	}
2930 	if (addr_list != NULL) {
2931 		m_freem_list(addr_list);
2932 	}
2933 	if (ctl_list != NULL) {
2934 		m_freem_list(ctl_list);
2935 	}
2936 	if (auio != NULL) {
2937 		uio_free(auio);
2938 	}
2939 
2940 	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2941 
2942 	return error;
2943 }
2944 
2945 /*
2946  * Returns:	0			Success
2947  *		EBADF
2948  *	file_socket:ENOTSOCK
2949  *	file_socket:EBADF
2950  *	soshutdown:EINVAL
2951  *	soshutdown:ENOTCONN
2952  *	soshutdown:EADDRNOTAVAIL[TCP]
2953  *	soshutdown:ENOBUFS[TCP]
2954  *	soshutdown:EMSGSIZE[TCP]
2955  *	soshutdown:EHOSTUNREACH[TCP]
2956  *	soshutdown:ENETUNREACH[TCP]
2957  *	soshutdown:ENETDOWN[TCP]
2958  *	soshutdown:ENOMEM[TCP]
2959  *	soshutdown:EACCES[TCP]
2960  *	soshutdown:EMSGSIZE[TCP]
2961  *	soshutdown:ENOBUFS[TCP]
2962  *	soshutdown:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
2963  *	soshutdown:???			[other protocol families]
2964  */
2965 /* ARGSUSED */
2966 int
shutdown(__unused proc_ref_t p,struct shutdown_args * uap,__unused int32_ref_t retval)2967 shutdown(__unused proc_ref_t p, struct shutdown_args *uap,
2968     __unused int32_ref_t retval)
2969 {
2970 	socket_ref_t so;
2971 	int error;
2972 
2973 	AUDIT_ARG(fd, uap->s);
2974 	error = file_socket(uap->s, &so);
2975 	if (error) {
2976 		return error;
2977 	}
2978 	if (so == NULL) {
2979 		error = EBADF;
2980 		goto out;
2981 	}
2982 	error =  soshutdown((struct socket *)so, uap->how);
2983 out:
2984 	file_drop(uap->s);
2985 	return error;
2986 }
2987 
2988 /*
2989  * Returns:	0			Success
2990  *		EFAULT
2991  *		EINVAL
2992  *		EACCES			Mandatory Access Control failure
2993  *	file_socket:ENOTSOCK
2994  *	file_socket:EBADF
2995  *	sosetopt:EINVAL
2996  *	sosetopt:ENOPROTOOPT
2997  *	sosetopt:ENOBUFS
2998  *	sosetopt:EDOM
2999  *	sosetopt:EFAULT
3000  *	sosetopt:EOPNOTSUPP[AF_UNIX]
3001  *	sosetopt:???
3002  */
3003 /* ARGSUSED */
3004 int
setsockopt(proc_ref_t p,setsockopt_args_ref_t uap,__unused int32_ref_t retval)3005 setsockopt(proc_ref_t p, setsockopt_args_ref_t uap,
3006     __unused int32_ref_t retval)
3007 {
3008 	socket_ref_t so;
3009 	struct sockopt sopt;
3010 	int error;
3011 
3012 	AUDIT_ARG(fd, uap->s);
3013 	if (uap->val == 0 && uap->valsize != 0) {
3014 		return EFAULT;
3015 	}
3016 	/* No bounds checking on size (it's unsigned) */
3017 
3018 	error = file_socket(uap->s, &so);
3019 	if (error) {
3020 		return error;
3021 	}
3022 
3023 	sopt.sopt_dir = SOPT_SET;
3024 	sopt.sopt_level = uap->level;
3025 	sopt.sopt_name = uap->name;
3026 	sopt.sopt_val = uap->val;
3027 	sopt.sopt_valsize = uap->valsize;
3028 	sopt.sopt_p = p;
3029 
3030 	if (so == NULL) {
3031 		error = EINVAL;
3032 		goto out;
3033 	}
3034 #if CONFIG_MACF_SOCKET_SUBSET
3035 	if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
3036 	    &sopt)) != 0) {
3037 		goto out;
3038 	}
3039 #endif /* MAC_SOCKET_SUBSET */
3040 	error = sosetoptlock(so, &sopt, 1);     /* will lock socket */
3041 out:
3042 	file_drop(uap->s);
3043 	return error;
3044 }
3045 
3046 /*
3047  * Returns:	0			Success
3048  *		EINVAL
3049  *		EBADF
3050  *		EACCES			Mandatory Access Control failure
3051  *	copyin:EFAULT
3052  *	copyout:EFAULT
3053  *	file_socket:ENOTSOCK
3054  *	file_socket:EBADF
3055  *	sogetopt:???
3056  */
3057 int
getsockopt(proc_ref_t p,struct getsockopt_args * uap,__unused int32_ref_t retval)3058 getsockopt(proc_ref_t p, struct getsockopt_args  *uap,
3059     __unused int32_ref_t retval)
3060 {
3061 	int             error;
3062 	socklen_t       valsize;
3063 	struct sockopt  sopt;
3064 	socket_ref_t so;
3065 
3066 	error = file_socket(uap->s, &so);
3067 	if (error) {
3068 		return error;
3069 	}
3070 	if (uap->val) {
3071 		error = copyin(uap->avalsize, (caddr_t)&valsize,
3072 		    sizeof(valsize));
3073 		if (error) {
3074 			goto out;
3075 		}
3076 		/* No bounds checking on size (it's unsigned) */
3077 	} else {
3078 		valsize = 0;
3079 	}
3080 	sopt.sopt_dir = SOPT_GET;
3081 	sopt.sopt_level = uap->level;
3082 	sopt.sopt_name = uap->name;
3083 	sopt.sopt_val = uap->val;
3084 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
3085 	sopt.sopt_p = p;
3086 
3087 	if (so == NULL) {
3088 		error = EBADF;
3089 		goto out;
3090 	}
3091 #if CONFIG_MACF_SOCKET_SUBSET
3092 	if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
3093 	    &sopt)) != 0) {
3094 		goto out;
3095 	}
3096 #endif /* MAC_SOCKET_SUBSET */
3097 	error = sogetoptlock((struct socket *)so, &sopt, 1);    /* will lock */
3098 	if (error == 0) {
3099 		valsize = (socklen_t)sopt.sopt_valsize;
3100 		error = copyout((caddr_t)&valsize, uap->avalsize,
3101 		    sizeof(valsize));
3102 	}
3103 out:
3104 	file_drop(uap->s);
3105 	return error;
3106 }
3107 
3108 
3109 /*
3110  * Get socket name.
3111  *
3112  * Returns:	0			Success
3113  *		EBADF
3114  *	file_socket:ENOTSOCK
3115  *	file_socket:EBADF
3116  *	copyin:EFAULT
3117  *	copyout:EFAULT
3118  *	<pru_sockaddr>:ENOBUFS[TCP]
3119  *	<pru_sockaddr>:ECONNRESET[TCP]
3120  *	<pru_sockaddr>:EINVAL[AF_UNIX]
3121  *	<sf_getsockname>:???
3122  */
3123 /* ARGSUSED */
3124 int
getsockname(__unused proc_ref_t p,struct getsockname_args * uap,__unused int32_ref_t retval)3125 getsockname(__unused proc_ref_t p, struct getsockname_args *uap,
3126     __unused int32_ref_t retval)
3127 {
3128 	socket_ref_t so;
3129 	sockaddr_ref_t  sa;
3130 	socklen_t len;
3131 	socklen_t sa_len;
3132 	int error;
3133 
3134 	error = file_socket(uap->fdes, &so);
3135 	if (error) {
3136 		return error;
3137 	}
3138 	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
3139 	if (error) {
3140 		goto out;
3141 	}
3142 	if (so == NULL) {
3143 		error = EBADF;
3144 		goto out;
3145 	}
3146 	sa = 0;
3147 	socket_lock(so, 1);
3148 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
3149 	if (error == 0) {
3150 		error = sflt_getsockname(so, &sa);
3151 		if (error == EJUSTRETURN) {
3152 			error = 0;
3153 		}
3154 	}
3155 	socket_unlock(so, 1);
3156 	if (error) {
3157 		goto bad;
3158 	}
3159 	if (sa == 0) {
3160 		len = 0;
3161 		goto gotnothing;
3162 	}
3163 
3164 	sa_len = sa->sa_len;
3165 	len = MIN(len, sa_len);
3166 	error = copyout((caddr_t)sa, uap->asa, len);
3167 	if (error) {
3168 		goto bad;
3169 	}
3170 	/* return the actual, untruncated address length */
3171 	len = sa_len;
3172 gotnothing:
3173 	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
3174 bad:
3175 	free_sockaddr(sa);
3176 out:
3177 	file_drop(uap->fdes);
3178 	return error;
3179 }
3180 
3181 /*
3182  * Get name of peer for connected socket.
3183  *
3184  * Returns:	0			Success
3185  *		EBADF
3186  *		EINVAL
3187  *		ENOTCONN
3188  *	file_socket:ENOTSOCK
3189  *	file_socket:EBADF
3190  *	copyin:EFAULT
3191  *	copyout:EFAULT
3192  *	<pru_peeraddr>:???
3193  *	<sf_getpeername>:???
3194  */
3195 /* ARGSUSED */
3196 int
getpeername(__unused proc_ref_t p,struct getpeername_args * uap,__unused int32_ref_t retval)3197 getpeername(__unused proc_ref_t p, struct getpeername_args *uap,
3198     __unused int32_ref_t retval)
3199 {
3200 	socket_ref_t so;
3201 	sockaddr_ref_t  sa;
3202 	socklen_t len;
3203 	socklen_t sa_len;
3204 	int error;
3205 
3206 	error = file_socket(uap->fdes, &so);
3207 	if (error) {
3208 		return error;
3209 	}
3210 	if (so == NULL) {
3211 		error = EBADF;
3212 		goto out;
3213 	}
3214 
3215 	socket_lock(so, 1);
3216 
3217 	if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
3218 	    (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
3219 		/* the socket has been shutdown, no more getpeername's */
3220 		socket_unlock(so, 1);
3221 		error = EINVAL;
3222 		goto out;
3223 	}
3224 
3225 	if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
3226 		socket_unlock(so, 1);
3227 		error = ENOTCONN;
3228 		goto out;
3229 	}
3230 	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
3231 	if (error) {
3232 		socket_unlock(so, 1);
3233 		goto out;
3234 	}
3235 	sa = 0;
3236 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
3237 	if (error == 0) {
3238 		error = sflt_getpeername(so, &sa);
3239 		if (error == EJUSTRETURN) {
3240 			error = 0;
3241 		}
3242 	}
3243 	socket_unlock(so, 1);
3244 	if (error) {
3245 		goto bad;
3246 	}
3247 	if (sa == 0) {
3248 		len = 0;
3249 		goto gotnothing;
3250 	}
3251 	sa_len = sa->sa_len;
3252 	len = MIN(len, sa_len);
3253 	error = copyout(sa, uap->asa, len);
3254 	if (error) {
3255 		goto bad;
3256 	}
3257 	/* return the actual, untruncated address length */
3258 	len = sa_len;
3259 gotnothing:
3260 	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
3261 bad:
3262 	free_sockaddr(sa);
3263 out:
3264 	file_drop(uap->fdes);
3265 	return error;
3266 }
3267 
3268 int
sockargs(struct mbuf ** mp,user_addr_t data,socklen_t buflen,int type)3269 sockargs(struct mbuf **mp, user_addr_t data, socklen_t buflen, int type)
3270 {
3271 	sockaddr_ref_t sa;
3272 	struct mbuf *m;
3273 	int error;
3274 	socklen_t alloc_buflen = buflen;
3275 
3276 	if (buflen > INT_MAX / 2) {
3277 		return EINVAL;
3278 	}
3279 	if (type == MT_SONAME && (buflen > SOCK_MAXADDRLEN ||
3280 	    buflen < offsetof(struct sockaddr, sa_data[0]))) {
3281 		return EINVAL;
3282 	}
3283 	if (type == MT_CONTROL && buflen < sizeof(struct cmsghdr)) {
3284 		return EINVAL;
3285 	}
3286 
3287 #ifdef __LP64__
3288 	/*
3289 	 * The fd's in the buffer must expand to be pointers, thus we need twice
3290 	 * as much space
3291 	 */
3292 	if (type == MT_CONTROL) {
3293 		alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
3294 		    sizeof(struct cmsghdr);
3295 	}
3296 #endif
3297 	if (alloc_buflen > MLEN) {
3298 		if (type == MT_SONAME && alloc_buflen <= 112) {
3299 			alloc_buflen = MLEN;    /* unix domain compat. hack */
3300 		} else if (alloc_buflen > MCLBYTES) {
3301 			return EINVAL;
3302 		}
3303 	}
3304 	m = m_get(M_WAIT, type);
3305 	if (m == NULL) {
3306 		return ENOBUFS;
3307 	}
3308 	if (alloc_buflen > MLEN) {
3309 		MCLGET(m, M_WAIT);
3310 		if ((m->m_flags & M_EXT) == 0) {
3311 			m_free(m);
3312 			return ENOBUFS;
3313 		}
3314 	}
3315 	/*
3316 	 * K64: We still copyin the original buflen because it gets expanded
3317 	 * later and we lie about the size of the mbuf because it only affects
3318 	 * unp_* functions
3319 	 */
3320 	m->m_len = buflen;
3321 	error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
3322 	if (error) {
3323 		(void) m_free(m);
3324 	} else {
3325 		*mp = m;
3326 		if (type == MT_SONAME) {
3327 			VERIFY(buflen <= SOCK_MAXADDRLEN);
3328 			sa = mtod(m, sockaddr_ref_t);
3329 			sa->sa_len = (__uint8_t)buflen;
3330 		}
3331 	}
3332 	return error;
3333 }
3334 
3335 /*
3336  * Given a user_addr_t of length len, allocate and fill out a *sa.
3337  *
3338  * Returns:	0			Success
3339  *		ENAMETOOLONG		Filename too long
3340  *		EINVAL			Invalid argument
3341  *		ENOMEM			Not enough space
3342  *		copyin:EFAULT		Bad address
3343  */
3344 static int
getsockaddr(struct socket * so,sockaddr_ref_ref_t namp,user_addr_t uaddr,size_t len,boolean_t translate_unspec)3345 getsockaddr(struct socket *so, sockaddr_ref_ref_t namp, user_addr_t uaddr,
3346     size_t len, boolean_t translate_unspec)
3347 {
3348 	sockaddr_ref_t  sa;
3349 	int error;
3350 
3351 	if (len > SOCK_MAXADDRLEN) {
3352 		return ENAMETOOLONG;
3353 	}
3354 
3355 	if (len < offsetof(struct sockaddr, sa_data[0])) {
3356 		return EINVAL;
3357 	}
3358 
3359 	sa = SA(alloc_sockaddr(len, Z_WAITOK | Z_NOFAIL));
3360 
3361 	error = copyin(uaddr, (caddr_t)sa, len);
3362 	if (error) {
3363 		free_sockaddr(sa);
3364 	} else {
3365 		/*
3366 		 * Force sa_family to AF_INET on AF_INET sockets to handle
3367 		 * legacy applications that use AF_UNSPEC (0).  On all other
3368 		 * sockets we leave it unchanged and let the lower layer
3369 		 * handle it.
3370 		 */
3371 		if (translate_unspec && sa->sa_family == AF_UNSPEC &&
3372 		    SOCK_CHECK_DOM(so, PF_INET) &&
3373 		    len == sizeof(struct sockaddr_in)) {
3374 			sa->sa_family = AF_INET;
3375 		}
3376 		VERIFY(len <= SOCK_MAXADDRLEN);
3377 		sa = *&sa;
3378 		sa->sa_len = (__uint8_t)len;
3379 		*namp = sa;
3380 	}
3381 	return error;
3382 }
3383 
3384 static int
getsockaddr_s(struct socket * so,sockaddr_storage_ref_t ss,user_addr_t uaddr,size_t len,boolean_t translate_unspec)3385 getsockaddr_s(struct socket *so, sockaddr_storage_ref_t ss,
3386     user_addr_t uaddr, size_t len, boolean_t translate_unspec)
3387 {
3388 	int error;
3389 
3390 	if (ss == NULL || uaddr == USER_ADDR_NULL ||
3391 	    len < offsetof(struct sockaddr, sa_data[0])) {
3392 		return EINVAL;
3393 	}
3394 
3395 	/*
3396 	 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
3397 	 * so the check here is inclusive.
3398 	 */
3399 	if (len > sizeof(*ss)) {
3400 		return ENAMETOOLONG;
3401 	}
3402 
3403 	bzero(ss, sizeof(*ss));
3404 	error = copyin(uaddr, (caddr_t)ss, len);
3405 	if (error == 0) {
3406 		/*
3407 		 * Force sa_family to AF_INET on AF_INET sockets to handle
3408 		 * legacy applications that use AF_UNSPEC (0).  On all other
3409 		 * sockets we leave it unchanged and let the lower layer
3410 		 * handle it.
3411 		 */
3412 		if (translate_unspec && ss->ss_family == AF_UNSPEC &&
3413 		    SOCK_CHECK_DOM(so, PF_INET) &&
3414 		    len == sizeof(struct sockaddr_in)) {
3415 			ss->ss_family = AF_INET;
3416 		}
3417 
3418 		ss->ss_len = (__uint8_t)len;
3419 	}
3420 	return error;
3421 }
3422 
3423 int
internalize_recv_msghdr_array(const void_ptr_t src,int spacetype,int direction,u_int count,user_msghdr_x_ptr_t dst,recv_msg_elem_ptr_t recv_msg_array)3424 internalize_recv_msghdr_array(const void_ptr_t src, int spacetype, int direction,
3425     u_int count, user_msghdr_x_ptr_t dst,
3426     recv_msg_elem_ptr_t recv_msg_array)
3427 {
3428 	int error = 0;
3429 	u_int i;
3430 
3431 	for (i = 0; i < count; i++) {
3432 		struct user_iovec *iovp;
3433 		struct user_msghdr_x *user_msg = dst + i;
3434 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3435 
3436 		if (spacetype == UIO_USERSPACE64) {
3437 			const struct user64_msghdr_x *msghdr64;
3438 
3439 			msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3440 
3441 			user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
3442 			user_msg->msg_namelen = msghdr64->msg_namelen;
3443 			user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
3444 			user_msg->msg_iovlen = msghdr64->msg_iovlen;
3445 			user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
3446 			user_msg->msg_controllen = msghdr64->msg_controllen;
3447 			user_msg->msg_flags = msghdr64->msg_flags;
3448 			user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
3449 		} else {
3450 			const struct user32_msghdr_x *msghdr32;
3451 
3452 			msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3453 
3454 			user_msg->msg_name = msghdr32->msg_name;
3455 			user_msg->msg_namelen = msghdr32->msg_namelen;
3456 			user_msg->msg_iov = msghdr32->msg_iov;
3457 			user_msg->msg_iovlen = msghdr32->msg_iovlen;
3458 			user_msg->msg_control = msghdr32->msg_control;
3459 			user_msg->msg_controllen = msghdr32->msg_controllen;
3460 			user_msg->msg_flags = msghdr32->msg_flags;
3461 			user_msg->msg_datalen = msghdr32->msg_datalen;
3462 		}
3463 
3464 		if (user_msg->msg_iovlen <= 0 ||
3465 		    user_msg->msg_iovlen > UIO_MAXIOV) {
3466 			error = EMSGSIZE;
3467 			goto done;
3468 		}
3469 		recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3470 		    spacetype, direction);
3471 		if (recv_msg_elem->uio == NULL) {
3472 			error = ENOMEM;
3473 			goto done;
3474 		}
3475 
3476 		iovp = uio_iovsaddr_user(recv_msg_elem->uio);
3477 		if (iovp == NULL) {
3478 			error = ENOMEM;
3479 			goto done;
3480 		}
3481 		error = copyin_user_iovec_array(user_msg->msg_iov,
3482 		    spacetype, user_msg->msg_iovlen, iovp);
3483 		if (error) {
3484 			goto done;
3485 		}
3486 		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3487 
3488 		error = uio_calculateresid_user(recv_msg_elem->uio);
3489 		if (error) {
3490 			goto done;
3491 		}
3492 		user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3493 
3494 		if (user_msg->msg_name && user_msg->msg_namelen) {
3495 			recv_msg_elem->which |= SOCK_MSG_SA;
3496 		}
3497 		if (user_msg->msg_control && user_msg->msg_controllen) {
3498 			recv_msg_elem->which |= SOCK_MSG_CONTROL;
3499 		}
3500 	}
3501 done:
3502 
3503 	return error;
3504 }
3505 
3506 u_int
externalize_recv_msghdr_array(proc_ref_t p,socket_ref_t so,void_ptr_t dst,u_int count,user_msghdr_x_ptr_t src,recv_msg_elem_ptr_t recv_msg_array,int_ref_t ret_error)3507 externalize_recv_msghdr_array(proc_ref_t p, socket_ref_t so, void_ptr_t dst,
3508     u_int count, user_msghdr_x_ptr_t src,
3509     recv_msg_elem_ptr_t recv_msg_array, int_ref_t ret_error)
3510 {
3511 	u_int i;
3512 	u_int retcnt = 0;
3513 	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3514 
3515 	*ret_error = 0;
3516 
3517 	for (i = 0; i < count; i++) {
3518 		struct user_msghdr_x *user_msg = src + i;
3519 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3520 		user_ssize_t len = 0;
3521 		int error;
3522 
3523 		len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3524 
3525 		if ((recv_msg_elem->which & SOCK_MSG_DATA)) {
3526 			retcnt++;
3527 
3528 			if (recv_msg_elem->which & SOCK_MSG_SA) {
3529 				error = copyout_sa(recv_msg_elem->psa, user_msg->msg_name,
3530 				    &user_msg->msg_namelen);
3531 				if (error != 0) {
3532 					*ret_error = error;
3533 					return 0;
3534 				}
3535 			}
3536 			if (recv_msg_elem->which & SOCK_MSG_CONTROL) {
3537 				error = copyout_control(p, recv_msg_elem->controlp,
3538 				    user_msg->msg_control, &user_msg->msg_controllen,
3539 				    &recv_msg_elem->flags, so);
3540 				if (error != 0) {
3541 					*ret_error = error;
3542 					return 0;
3543 				}
3544 			}
3545 		}
3546 
3547 		if (spacetype == UIO_USERSPACE64) {
3548 			struct user64_msghdr_x *msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3549 
3550 			msghdr64->msg_namelen = user_msg->msg_namelen;
3551 			msghdr64->msg_controllen = user_msg->msg_controllen;
3552 			msghdr64->msg_flags = recv_msg_elem->flags;
3553 			msghdr64->msg_datalen = len;
3554 		} else {
3555 			struct user32_msghdr_x *msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3556 
3557 			msghdr32->msg_namelen = user_msg->msg_namelen;
3558 			msghdr32->msg_controllen = user_msg->msg_controllen;
3559 			msghdr32->msg_flags = recv_msg_elem->flags;
3560 			msghdr32->msg_datalen = (user32_size_t)len;
3561 		}
3562 	}
3563 	return retcnt;
3564 }
3565 
3566 recv_msg_elem_ptr_t
alloc_recv_msg_array(u_int count)3567 alloc_recv_msg_array(u_int count)
3568 {
3569 	return kalloc_type(struct recv_msg_elem, count, Z_WAITOK | Z_ZERO);
3570 }
3571 
3572 void
free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array,u_int count)3573 free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3574 {
3575 	if (recv_msg_array == NULL) {
3576 		return;
3577 	}
3578 	for (uint32_t i = 0; i < count; i++) {
3579 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3580 
3581 		if (recv_msg_elem->uio != NULL) {
3582 			uio_free(recv_msg_elem->uio);
3583 		}
3584 		free_sockaddr(recv_msg_elem->psa);
3585 		if (recv_msg_elem->controlp != NULL) {
3586 			m_freem(recv_msg_elem->controlp);
3587 		}
3588 	}
3589 	kfree_type(struct recv_msg_elem, count, recv_msg_array);
3590 }
3591 
3592 
3593 /* Extern linkage requires using __counted_by instead of bptr */
3594 __private_extern__ user_ssize_t
recv_msg_array_resid(struct recv_msg_elem * __counted_by (count)recv_msg_array,u_int count)3595 recv_msg_array_resid(struct recv_msg_elem * __counted_by(count)recv_msg_array, u_int count)
3596 {
3597 	user_ssize_t len = 0;
3598 	u_int i;
3599 
3600 	for (i = 0; i < count; i++) {
3601 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3602 
3603 		if (recv_msg_elem->uio != NULL) {
3604 			len += uio_resid(recv_msg_elem->uio);
3605 		}
3606 	}
3607 	return len;
3608 }
3609 
3610 int
recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array,u_int count)3611 recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3612 {
3613 	user_ssize_t len = 0;
3614 	u_int i;
3615 
3616 	for (i = 0; i < count; i++) {
3617 		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3618 
3619 		if (recv_msg_elem->uio != NULL) {
3620 			user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3621 
3622 			/*
3623 			 * Sanity check on the validity of the iovec:
3624 			 * no point of going over sb_max
3625 			 */
3626 			if (resid < 0 || (u_int32_t)resid > sb_max) {
3627 				return 0;
3628 			}
3629 
3630 			len += resid;
3631 			if (len < 0 || (u_int32_t)len > sb_max) {
3632 				return 0;
3633 			}
3634 		}
3635 	}
3636 	return 1;
3637 }
3638 
3639 #if SENDFILE
3640 
3641 #define SFUIOBUFS 64
3642 
3643 /* Macros to compute the number of mbufs needed depending on cluster size */
3644 #define HOWMANY_16K(n)  ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3645 #define HOWMANY_4K(n)   ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3646 
3647 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3648 #define SENDFILE_MAX_BYTES      (SFUIOBUFS << PGSHIFT)
3649 
3650 /* Upper send limit in the number of mbuf clusters */
3651 #define SENDFILE_MAX_16K        HOWMANY_16K(SENDFILE_MAX_BYTES)
3652 #define SENDFILE_MAX_4K         HOWMANY_4K(SENDFILE_MAX_BYTES)
3653 
3654 static void
alloc_sendpkt(int how,size_t pktlen,unsigned int * maxchunks,mbuf_ref_ref_t m,boolean_t jumbocl)3655 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3656     mbuf_ref_ref_t m, boolean_t jumbocl)
3657 {
3658 	unsigned int needed;
3659 
3660 	if (pktlen == 0) {
3661 		panic("%s: pktlen (%ld) must be non-zero", __func__, pktlen);
3662 	}
3663 
3664 	/*
3665 	 * Try to allocate for the whole thing.  Since we want full control
3666 	 * over the buffer size and be able to accept partial result, we can't
3667 	 * use mbuf_allocpacket().  The logic below is similar to sosend().
3668 	 */
3669 	*m = NULL;
3670 	if (pktlen > MBIGCLBYTES && jumbocl) {
3671 		needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3672 		*m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3673 	}
3674 	if (*m == NULL) {
3675 		needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3676 		*m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3677 	}
3678 
3679 	/*
3680 	 * Our previous attempt(s) at allocation had failed; the system
3681 	 * may be short on mbufs, and we want to block until they are
3682 	 * available.  This time, ask just for 1 mbuf and don't return
3683 	 * until we get it.
3684 	 */
3685 	if (*m == NULL) {
3686 		needed = 1;
3687 		*m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3688 	}
3689 	if (*m == NULL) {
3690 		panic("%s: blocking allocation returned NULL", __func__);
3691 	}
3692 
3693 	*maxchunks = needed;
3694 }
3695 
3696 /*
3697  * sendfile(2).
3698  * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3699  *	 struct sf_hdtr *hdtr, int flags)
3700  *
3701  * Send a file specified by 'fd' and starting at 'offset' to a socket
3702  * specified by 's'. Send only '*nbytes' of the file or until EOF if
3703  * *nbytes == 0. Optionally add a header and/or trailer to the socket
3704  * output. If specified, write the total number of bytes sent into *nbytes.
3705  */
3706 int
sendfile(proc_ref_t p,struct sendfile_args * uap,__unused int * retval)3707 sendfile(proc_ref_t p, struct sendfile_args *uap, __unused int *retval)
3708 {
3709 	fileproc_ref_t  fp;
3710 	vnode_ref_t  vp;
3711 	socket_ref_t so;
3712 	struct writev_nocancel_args nuap;
3713 	user_ssize_t writev_retval;
3714 	struct user_sf_hdtr user_hdtr;
3715 	struct user32_sf_hdtr user32_hdtr;
3716 	struct user64_sf_hdtr user64_hdtr;
3717 	off_t off, xfsize;
3718 	off_t nbytes = 0, sbytes = 0;
3719 	int error = 0;
3720 	size_t sizeof_hdtr;
3721 	off_t file_size;
3722 	struct vfs_context context = *vfs_context_current();
3723 
3724 	const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
3725 
3726 	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3727 	    0, 0, 0, 0);
3728 
3729 	AUDIT_ARG(fd, uap->fd);
3730 	AUDIT_ARG(value32, uap->s);
3731 
3732 	/*
3733 	 * Do argument checking. Must be a regular file in, stream
3734 	 * type and connected socket out, positive offset.
3735 	 */
3736 	if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3737 		goto done;
3738 	}
3739 	if ((fp->f_flag & FREAD) == 0) {
3740 		error = EBADF;
3741 		goto done1;
3742 	}
3743 	if (vnode_isreg(vp) == 0) {
3744 		error = ENOTSUP;
3745 		goto done1;
3746 	}
3747 	error = file_socket(uap->s, &so);
3748 	if (error) {
3749 		goto done1;
3750 	}
3751 	if (so == NULL) {
3752 		error = EBADF;
3753 		goto done2;
3754 	}
3755 	if (so->so_type != SOCK_STREAM) {
3756 		error = EINVAL;
3757 		goto done2;
3758 	}
3759 	if ((so->so_state & SS_ISCONNECTED) == 0) {
3760 		error = ENOTCONN;
3761 		goto done2;
3762 	}
3763 	if (uap->offset < 0) {
3764 		error = EINVAL;
3765 		goto done2;
3766 	}
3767 	if (uap->nbytes == USER_ADDR_NULL) {
3768 		error = EINVAL;
3769 		goto done2;
3770 	}
3771 	if (uap->flags != 0) {
3772 		error = EINVAL;
3773 		goto done2;
3774 	}
3775 
3776 	context.vc_ucred = fp->fp_glob->fg_cred;
3777 
3778 #if CONFIG_MACF_SOCKET_SUBSET
3779 	/* JMM - fetch connected sockaddr? */
3780 	error = mac_socket_check_send(context.vc_ucred, so, NULL);
3781 	if (error) {
3782 		goto done2;
3783 	}
3784 #endif
3785 
3786 	/*
3787 	 * Get number of bytes to send
3788 	 * Should it applies to size of header and trailer?
3789 	 */
3790 	error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3791 	if (error) {
3792 		goto done2;
3793 	}
3794 
3795 	/*
3796 	 * If specified, get the pointer to the sf_hdtr struct for
3797 	 * any headers/trailers.
3798 	 */
3799 	if (uap->hdtr != USER_ADDR_NULL) {
3800 		caddr_t hdtrp;
3801 
3802 		bzero(&user_hdtr, sizeof(user_hdtr));
3803 		if (is_p_64bit_process) {
3804 			hdtrp = (caddr_t)&user64_hdtr;
3805 			sizeof_hdtr = sizeof(user64_hdtr);
3806 		} else {
3807 			hdtrp = (caddr_t)&user32_hdtr;
3808 			sizeof_hdtr = sizeof(user32_hdtr);
3809 		}
3810 		error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3811 		if (error) {
3812 			goto done2;
3813 		}
3814 		if (is_p_64bit_process) {
3815 			user_hdtr.headers = user64_hdtr.headers;
3816 			user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3817 			user_hdtr.trailers = user64_hdtr.trailers;
3818 			user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3819 		} else {
3820 			user_hdtr.headers = user32_hdtr.headers;
3821 			user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3822 			user_hdtr.trailers = user32_hdtr.trailers;
3823 			user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3824 		}
3825 
3826 		/*
3827 		 * Send any headers. Wimp out and use writev(2).
3828 		 */
3829 		if (user_hdtr.headers != USER_ADDR_NULL) {
3830 			bzero(&nuap, sizeof(struct writev_args));
3831 			nuap.fd = uap->s;
3832 			nuap.iovp = user_hdtr.headers;
3833 			nuap.iovcnt = user_hdtr.hdr_cnt;
3834 			error = writev_nocancel(p, &nuap, &writev_retval);
3835 			if (error) {
3836 				goto done2;
3837 			}
3838 			sbytes += writev_retval;
3839 		}
3840 	}
3841 
3842 	/*
3843 	 * Get the file size for 2 reasons:
3844 	 *  1. We don't want to allocate more mbufs than necessary
3845 	 *  2. We don't want to read past the end of file
3846 	 */
3847 	if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3848 		goto done2;
3849 	}
3850 
3851 	/*
3852 	 * Simply read file data into a chain of mbufs that used with scatter
3853 	 * gather reads. We're not (yet?) setup to use zero copy external
3854 	 * mbufs that point to the file pages.
3855 	 */
3856 	socket_lock(so, 1);
3857 	error = sblock(&so->so_snd, SBL_WAIT);
3858 	if (error) {
3859 		socket_unlock(so, 1);
3860 		goto done2;
3861 	}
3862 	for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3863 		mbuf_ref_t m0 = NULL;
3864 		mbuf_t  m;
3865 		unsigned int    nbufs = SFUIOBUFS, i;
3866 		uio_t   auio;
3867 		UIO_STACKBUF(uio_buf, SFUIOBUFS);               /* 1KB !!! */
3868 		size_t  uiolen;
3869 		user_ssize_t    rlen;
3870 		off_t   pgoff;
3871 		size_t  pktlen;
3872 		boolean_t jumbocl;
3873 
3874 		/*
3875 		 * Calculate the amount to transfer.
3876 		 * Align to round number of pages.
3877 		 * Not to exceed send socket buffer,
3878 		 * the EOF, or the passed in nbytes.
3879 		 */
3880 		xfsize = sbspace(&so->so_snd);
3881 
3882 		if (xfsize <= 0) {
3883 			if (so->so_state & SS_CANTSENDMORE) {
3884 				error = EPIPE;
3885 				goto done3;
3886 			} else if ((so->so_state & SS_NBIO)) {
3887 				error = EAGAIN;
3888 				goto done3;
3889 			} else {
3890 				xfsize = PAGE_SIZE;
3891 			}
3892 		}
3893 
3894 		if (xfsize > SENDFILE_MAX_BYTES) {
3895 			xfsize = SENDFILE_MAX_BYTES;
3896 		} else if (xfsize > PAGE_SIZE) {
3897 			xfsize = trunc_page(xfsize);
3898 		}
3899 		pgoff = off & PAGE_MASK_64;
3900 		if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3901 			xfsize = PAGE_SIZE_64 - pgoff;
3902 		}
3903 		if (nbytes && xfsize > (nbytes - sbytes)) {
3904 			xfsize = nbytes - sbytes;
3905 		}
3906 		if (xfsize <= 0) {
3907 			break;
3908 		}
3909 		if (off + xfsize > file_size) {
3910 			xfsize = file_size - off;
3911 		}
3912 		if (xfsize <= 0) {
3913 			break;
3914 		}
3915 
3916 		/*
3917 		 * Attempt to use larger than system page-size clusters for
3918 		 * large writes only if there is a jumbo cluster pool and
3919 		 * if the socket is marked accordingly.
3920 		 */
3921 		jumbocl = sosendjcl && njcl > 0 &&
3922 		    ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3923 
3924 		socket_unlock(so, 0);
3925 		alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3926 		pktlen = mbuf_pkthdr_maxlen(m0);
3927 		if (pktlen < (size_t)xfsize) {
3928 			xfsize = pktlen;
3929 		}
3930 
3931 		auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3932 		    UIO_READ, &uio_buf[0], sizeof(uio_buf));
3933 		if (auio == NULL) {
3934 			DBG_PRINTF("sendfile failed. nbufs = %d. %s", nbufs,
3935 			    "File a radar related to rdar://10146739.\n");
3936 			mbuf_freem(m0);
3937 			error = ENXIO;
3938 			socket_lock(so, 0);
3939 			goto done3;
3940 		}
3941 
3942 		for (i = 0, m = m0, uiolen = 0;
3943 		    i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3944 		    i++, m = mbuf_next(m)) {
3945 			size_t mlen = mbuf_maxlen(m);
3946 
3947 			if (mlen + uiolen > (size_t)xfsize) {
3948 				mlen = xfsize - uiolen;
3949 			}
3950 			mbuf_setlen(m, mlen);
3951 			uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3952 			    mlen);
3953 			uiolen += mlen;
3954 		}
3955 
3956 		if (xfsize != uio_resid(auio)) {
3957 			DBG_PRINTF("sendfile: xfsize: %lld != uio_resid(auio): "
3958 			    "%lld\n", xfsize, (long long)uio_resid(auio));
3959 		}
3960 
3961 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3962 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3963 		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3964 		error = fo_read(fp, auio, FOF_OFFSET, &context);
3965 		socket_lock(so, 0);
3966 		if (error != 0) {
3967 			if (uio_resid(auio) != xfsize && (error == ERESTART ||
3968 			    error == EINTR || error == EWOULDBLOCK)) {
3969 				error = 0;
3970 			} else {
3971 				mbuf_freem(m0);
3972 				goto done3;
3973 			}
3974 		}
3975 		xfsize -= uio_resid(auio);
3976 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3977 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3978 		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3979 
3980 		if (xfsize == 0) {
3981 			break;
3982 		}
3983 		if (xfsize + off > file_size) {
3984 			DBG_PRINTF("sendfile: xfsize: %lld + off: %lld > file_size:"
3985 			    "%lld\n", xfsize, off, file_size);
3986 		}
3987 		for (i = 0, m = m0, rlen = 0;
3988 		    i < nbufs && m != NULL && rlen < xfsize;
3989 		    i++, m = mbuf_next(m)) {
3990 			size_t mlen = mbuf_maxlen(m);
3991 
3992 			if (rlen + mlen > (size_t)xfsize) {
3993 				mlen = xfsize - rlen;
3994 			}
3995 			mbuf_setlen(m, mlen);
3996 
3997 			rlen += mlen;
3998 		}
3999 		mbuf_pkthdr_setlen(m0, xfsize);
4000 
4001 retry_space:
4002 		/*
4003 		 * Make sure that the socket is still able to take more data.
4004 		 * CANTSENDMORE being true usually means that the connection
4005 		 * was closed. so_error is true when an error was sensed after
4006 		 * a previous send.
4007 		 * The state is checked after the page mapping and buffer
4008 		 * allocation above since those operations may block and make
4009 		 * any socket checks stale. From this point forward, nothing
4010 		 * blocks before the pru_send (or more accurately, any blocking
4011 		 * results in a loop back to here to re-check).
4012 		 */
4013 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
4014 			if (so->so_state & SS_CANTSENDMORE) {
4015 				error = EPIPE;
4016 			} else {
4017 				error = so->so_error;
4018 				so->so_error = 0;
4019 			}
4020 			m_freem(m0);
4021 			goto done3;
4022 		}
4023 		/*
4024 		 * Wait for socket space to become available. We do this just
4025 		 * after checking the connection state above in order to avoid
4026 		 * a race condition with sbwait().
4027 		 */
4028 		if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
4029 			if (so->so_state & SS_NBIO) {
4030 				m_freem(m0);
4031 				error = EAGAIN;
4032 				goto done3;
4033 			}
4034 			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
4035 			    DBG_FUNC_START), uap->s, 0, 0, 0, 0);
4036 			error = sbwait(&so->so_snd);
4037 			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
4038 			    DBG_FUNC_END), uap->s, 0, 0, 0, 0);
4039 			/*
4040 			 * An error from sbwait usually indicates that we've
4041 			 * been interrupted by a signal. If we've sent anything
4042 			 * then return bytes sent, otherwise return the error.
4043 			 */
4044 			if (error) {
4045 				m_freem(m0);
4046 				goto done3;
4047 			}
4048 			goto retry_space;
4049 		}
4050 
4051 		mbuf_ref_t  control = NULL;
4052 		{
4053 			/*
4054 			 * Socket filter processing
4055 			 */
4056 
4057 			error = sflt_data_out(so, NULL, &m0, &control, 0);
4058 			if (error) {
4059 				if (error == EJUSTRETURN) {
4060 					error = 0;
4061 					continue;
4062 				}
4063 				goto done3;
4064 			}
4065 			/*
4066 			 * End Socket filter processing
4067 			 */
4068 		}
4069 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
4070 		    uap->s, 0, 0, 0, 0);
4071 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
4072 		    NULL, control, p);
4073 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
4074 		    uap->s, 0, 0, 0, 0);
4075 		if (error) {
4076 			goto done3;
4077 		}
4078 	}
4079 	sbunlock(&so->so_snd, FALSE);   /* will unlock socket */
4080 	/*
4081 	 * Send trailers. Wimp out and use writev(2).
4082 	 */
4083 	if (uap->hdtr != USER_ADDR_NULL &&
4084 	    user_hdtr.trailers != USER_ADDR_NULL) {
4085 		bzero(&nuap, sizeof(struct writev_args));
4086 		nuap.fd = uap->s;
4087 		nuap.iovp = user_hdtr.trailers;
4088 		nuap.iovcnt = user_hdtr.trl_cnt;
4089 		error = writev_nocancel(p, &nuap, &writev_retval);
4090 		if (error) {
4091 			goto done2;
4092 		}
4093 		sbytes += writev_retval;
4094 	}
4095 done2:
4096 	file_drop(uap->s);
4097 done1:
4098 	file_drop(uap->fd);
4099 done:
4100 	if (uap->nbytes != USER_ADDR_NULL) {
4101 		/* XXX this appears bogus for some early failure conditions */
4102 		copyout(&sbytes, uap->nbytes, sizeof(off_t));
4103 	}
4104 	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
4105 	    (unsigned int)((sbytes >> 32) & 0x0ffffffff),
4106 	    (unsigned int)(sbytes & 0x0ffffffff), error, 0);
4107 	return error;
4108 done3:
4109 	sbunlock(&so->so_snd, FALSE);   /* will unlock socket */
4110 	goto done2;
4111 }
4112 
4113 
4114 #endif /* SENDFILE */
4115