1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65 /*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72 #include <sys/cdefs.h>
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/filedesc.h>
76 #include <sys/proc_internal.h>
77 #include <sys/file_internal.h>
78 #include <sys/vnode_internal.h>
79 #include <sys/malloc.h>
80 #include <sys/mcache.h>
81 #include <sys/mbuf.h>
82 #include <kern/locks.h>
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/signalvar.h>
86 #include <sys/socket.h>
87 #include <sys/socketvar.h>
88 #include <sys/kernel.h>
89 #include <sys/uio_internal.h>
90 #include <sys/kauth.h>
91 #include <kern/task.h>
92 #include <sys/priv.h>
93 #include <sys/sysctl.h>
94 #include <sys/sys_domain.h>
95 #include <sys/types.h>
96
97 #include <security/audit/audit.h>
98
99 #include <sys/kdebug.h>
100 #include <sys/sysproto.h>
101 #include <netinet/in.h>
102 #include <net/route.h>
103 #include <netinet/in_pcb.h>
104
105 #include <os/log.h>
106 #include <os/ptrtools.h>
107
108 #include <os/log.h>
109
110 #if CONFIG_MACF_SOCKET_SUBSET
111 #include <security/mac_framework.h>
112 #endif /* MAC_SOCKET_SUBSET */
113
114 #define f_flag fp_glob->fg_flag
115 #define f_ops fp_glob->fg_ops
116
117 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
118 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
119 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
120 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
121 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
122 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
123 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
124 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
125 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
126 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
127 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
128 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
129 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
130 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
131 #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
132 #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
133
134 /* Forward declarations for referenced types */
135 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(void, void, __CCT_PTR);
136 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(uint8_t, uint8_t, __CCT_PTR);
137 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int32_t, int32, __CCT_REF);
138 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int, int, __CCT_REF);
139 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(user_ssize_t, user_ssize, __CCT_REF);
140 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(unsigned int, uint, __CCT_REF);
141 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(sae_connid_t, sae_connid, __CCT_REF);
142 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(socklen_t, socklen, __CCT_REF);
143 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct setsockopt_args, setsockopt_args, __CCT_REF);
144 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct connectx_args, connectx_args, __CCT_REF);
145 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct disconnectx_args, disconnectx_args, __CCT_REF);
146 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct cmsghdr, cmsghdr, __CCT_REF);
147 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct timeval, timeval, __CCT_REF);
148 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user64_timeval, user64_timeval, __CCT_REF);
149 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user32_timeval, user32_timeval, __CCT_REF);
150
151 static int sendit(proc_ref_t, socket_ref_t, user_msghdr_ref_t, uio_t,
152 int, int32_ref_t );
153 static int recvit(proc_ref_t, int, user_msghdr_ref_t, uio_t, user_addr_t,
154 int32_ref_t);
155 static int connectit(socket_ref_t, sockaddr_ref_t);
156 static int getsockaddr(socket_ref_t, sockaddr_ref_ref_t, user_addr_t,
157 size_t, boolean_t);
158 static int getsockaddr_s(socket_ref_t, sockaddr_storage_ref_t,
159 user_addr_t, size_t, boolean_t);
160 #if SENDFILE
161 static void alloc_sendpkt(int, size_t, uint_ref_t, mbuf_ref_ref_t,
162 boolean_t);
163 #endif /* SENDFILE */
164 static int connectx_nocancel(proc_ref_t, connectx_args_ref_t, int_ref_t);
165 static int connectitx(socket_ref_t, sockaddr_ref_t,
166 sockaddr_ref_t, proc_ref_t, uint32_t, sae_associd_t,
167 sae_connid_ref_t, uio_t, unsigned int, user_ssize_ref_t);
168 static int disconnectx_nocancel(proc_ref_t, disconnectx_args_ref_t,
169 int_ref_t);
170 static int socket_common(proc_ref_t, int, int, int, pid_t, int32_ref_t, int);
171
172 #if DEBUG || DEVELOPMENT
173 static int internalize_user_msghdr_array(const void_ptr_t, int, int,
174 u_int count, user_msghdr_x_ptr_t, uio_ref_ptr_t);
175
176 static void externalize_user_msghdr_array(void_ptr_t, int, int, u_int count,
177 const user_msghdr_x_ptr_t, uio_ref_ptr_t);
178
179 static void free_uio_array(uio_ref_ptr_t, u_int count);
180 static boolean_t uio_array_is_valid(uio_ref_ptr_t, u_int count);
181 #endif /* DEBUG || DEVELOPMENT */
182 static int internalize_recv_msghdr_array(const void_ptr_t, int, int,
183 u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t);
184 static u_int externalize_recv_msghdr_array(proc_ref_t, socket_ref_t, void_ptr_t,
185 u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t, int_ref_t);
186
187 static recv_msg_elem_ptr_t alloc_recv_msg_array(u_int count);
188 static int recv_msg_array_is_valid(recv_msg_elem_ptr_t, u_int count);
189 static void free_recv_msg_array(recv_msg_elem_ptr_t, u_int count);
190 static int copyout_control(proc_ref_t, mbuf_ref_t, user_addr_t control,
191 socklen_ref_t, int_ref_t, socket_ref_t);
192
193 SYSCTL_DECL(_kern_ipc);
194
195 #define SO_MAX_MSG_X_DEFAULT 256
196
197 static u_int somaxsendmsgx = SO_MAX_MSG_X_DEFAULT;
198 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
199 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
200
201 static u_int somaxrecvmsgx = SO_MAX_MSG_X_DEFAULT;
202 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
203 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
204
205 static u_int missingpktinfo = 0;
206 SYSCTL_UINT(_kern_ipc, OID_AUTO, missingpktinfo,
207 CTLFLAG_RD | CTLFLAG_LOCKED, &missingpktinfo, 0, "");
208
209 static int do_recvmsg_x_donttrunc = 0;
210 SYSCTL_INT(_kern_ipc, OID_AUTO, do_recvmsg_x_donttrunc,
211 CTLFLAG_RW | CTLFLAG_LOCKED, &do_recvmsg_x_donttrunc, 0, "");
212
213 #if DEBUG || DEVELOPMENT
214 static int uipc_debug = 0;
215 SYSCTL_INT(_kern_ipc, OID_AUTO, debug,
216 CTLFLAG_RW | CTLFLAG_LOCKED, &uipc_debug, 0, "");
217
218 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
219 #define DBG_PRINTF(...) if (uipc_debug != 0) { \
220 os_log(OS_LOG_DEFAULT, __VA_ARGS__); \
221 }
222 #else
223 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
224 #define DBG_PRINTF(...) do { } while (0)
225 #endif
226
227
228 /*
229 * Values for sendmsg_x_mode
230 * 0: default
231 * 1: sendit loop one at a time
232 * 2: old implementation
233 */
234 static u_int sendmsg_x_mode = 0;
235 SYSCTL_UINT(_kern_ipc, OID_AUTO, sendmsg_x_mode,
236 CTLFLAG_RW | CTLFLAG_LOCKED, &sendmsg_x_mode, 0, "");
237
238 /*
239 * System call interface to the socket abstraction.
240 */
241
242 extern const struct fileops socketops;
243
244 /*
245 * Returns: 0 Success
246 * EACCES Mandatory Access Control failure
247 * falloc:ENFILE
248 * falloc:EMFILE
249 * falloc:ENOMEM
250 * socreate:EAFNOSUPPORT
251 * socreate:EPROTOTYPE
252 * socreate:EPROTONOSUPPORT
253 * socreate:ENOBUFS
254 * socreate:ENOMEM
255 * socreate:??? [other protocol families, IPSEC]
256 */
257 int
socket(proc_ref_t p,struct socket_args * uap,int32_ref_t retval)258 socket(proc_ref_t p,
259 struct socket_args *uap,
260 int32_ref_t retval)
261 {
262 return socket_common(p, uap->domain, uap->type, uap->protocol,
263 proc_selfpid(), retval, 0);
264 }
265
266 int
socket_delegate(proc_ref_t p,struct socket_delegate_args * uap,int32_ref_t retval)267 socket_delegate(proc_ref_t p,
268 struct socket_delegate_args *uap,
269 int32_ref_t retval)
270 {
271 return socket_common(p, uap->domain, uap->type, uap->protocol,
272 uap->epid, retval, 1);
273 }
274
275 static int
socket_common(proc_ref_t p,int domain,int type,int protocol,pid_t epid,int32_ref_t retval,int delegate)276 socket_common(proc_ref_t p,
277 int domain,
278 int type,
279 int protocol,
280 pid_t epid,
281 int32_ref_t retval,
282 int delegate)
283 {
284 socket_ref_t so;
285 fileproc_ref_t fp;
286 int fd, error;
287
288 AUDIT_ARG(socket, domain, type, protocol);
289 #if CONFIG_MACF_SOCKET_SUBSET
290 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
291 type, protocol)) != 0) {
292 return error;
293 }
294 #endif /* MAC_SOCKET_SUBSET */
295
296 if (delegate) {
297 error = priv_check_cred(kauth_cred_get(),
298 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
299 if (error) {
300 return EACCES;
301 }
302 }
303
304 error = falloc(p, &fp, &fd, vfs_context_current());
305 if (error) {
306 return error;
307 }
308 fp->f_flag = FREAD | FWRITE;
309 fp->f_ops = &socketops;
310
311 if (delegate) {
312 error = socreate_delegate(domain, &so, type, protocol, epid);
313 } else {
314 error = socreate(domain, &so, type, protocol);
315 }
316
317 if (error) {
318 fp_free(p, fd, fp);
319 } else {
320 fp_set_data(fp, so);
321
322 proc_fdlock(p);
323 procfdtbl_releasefd(p, fd, NULL);
324
325 if (ENTR_SHOULDTRACE) {
326 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
327 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
328 }
329 fp_drop(p, fd, fp, 1);
330 proc_fdunlock(p);
331
332 *retval = fd;
333 }
334 return error;
335 }
336
337 /*
338 * Returns: 0 Success
339 * EDESTADDRREQ Destination address required
340 * EBADF Bad file descriptor
341 * EACCES Mandatory Access Control failure
342 * file_socket:ENOTSOCK
343 * file_socket:EBADF
344 * getsockaddr:ENAMETOOLONG Filename too long
345 * getsockaddr:EINVAL Invalid argument
346 * getsockaddr:ENOMEM Not enough space
347 * getsockaddr:EFAULT Bad address
348 * sobindlock:???
349 */
350 /* ARGSUSED */
351 int
bind(__unused proc_t p,struct bind_args * uap,__unused int32_ref_t retval)352 bind(__unused proc_t p, struct bind_args *uap, __unused int32_ref_t retval)
353 {
354 struct sockaddr_storage ss;
355 sockaddr_ref_t sa = NULL;
356 socket_ref_t so;
357 boolean_t want_free = TRUE;
358 int error;
359
360 AUDIT_ARG(fd, uap->s);
361 error = file_socket(uap->s, &so);
362 if (error != 0) {
363 return error;
364 }
365 if (so == NULL) {
366 error = EBADF;
367 goto out;
368 }
369 if (uap->name == USER_ADDR_NULL) {
370 error = EDESTADDRREQ;
371 goto out;
372 }
373 if (uap->namelen > sizeof(ss)) {
374 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
375 } else {
376 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
377 if (error == 0) {
378 sa = (sockaddr_ref_t)&ss;
379 want_free = FALSE;
380 }
381 }
382 if (error != 0) {
383 goto out;
384 }
385 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
386 #if CONFIG_MACF_SOCKET_SUBSET
387 if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
388 (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
389 error = sobindlock(so, sa, 1); /* will lock socket */
390 }
391 #else
392 error = sobindlock(so, sa, 1); /* will lock socket */
393 #endif /* MAC_SOCKET_SUBSET */
394 if (want_free) {
395 free_sockaddr(sa);
396 }
397 out:
398 file_drop(uap->s);
399 return error;
400 }
401
402 /*
403 * Returns: 0 Success
404 * EBADF
405 * EACCES Mandatory Access Control failure
406 * file_socket:ENOTSOCK
407 * file_socket:EBADF
408 * solisten:EINVAL
409 * solisten:EOPNOTSUPP
410 * solisten:???
411 */
412 int
listen(__unused proc_ref_t p,struct listen_args * uap,__unused int32_ref_t retval)413 listen(__unused proc_ref_t p, struct listen_args *uap,
414 __unused int32_ref_t retval)
415 {
416 int error;
417 socket_ref_t so;
418
419 AUDIT_ARG(fd, uap->s);
420 error = file_socket(uap->s, &so);
421 if (error) {
422 return error;
423 }
424 if (so != NULL)
425 #if CONFIG_MACF_SOCKET_SUBSET
426 {
427 error = mac_socket_check_listen(kauth_cred_get(), so);
428 if (error == 0) {
429 error = solisten(so, uap->backlog);
430 }
431 }
432 #else
433 { error = solisten(so, uap->backlog);}
434 #endif /* MAC_SOCKET_SUBSET */
435 else {
436 error = EBADF;
437 }
438
439 file_drop(uap->s);
440 return error;
441 }
442
443 /*
444 * Returns: fp_get_ftype:EBADF Bad file descriptor
445 * fp_get_ftype:ENOTSOCK Socket operation on non-socket
446 * :EFAULT Bad address on copyin/copyout
447 * :EBADF Bad file descriptor
448 * :EOPNOTSUPP Operation not supported on socket
449 * :EINVAL Invalid argument
450 * :EWOULDBLOCK Operation would block
451 * :ECONNABORTED Connection aborted
452 * :EINTR Interrupted function
453 * :EACCES Mandatory Access Control failure
454 * falloc:ENFILE Too many files open in system
455 * falloc:EMFILE Too many open files
456 * falloc:ENOMEM Not enough space
457 * 0 Success
458 */
459 int
accept_nocancel(proc_ref_t p,struct accept_nocancel_args * uap,int32_ref_t retval)460 accept_nocancel(proc_ref_t p, struct accept_nocancel_args *uap,
461 int32_ref_t retval)
462 {
463 fileproc_ref_t fp;
464 sockaddr_ref_t sa = NULL;
465 socklen_t namelen;
466 int error;
467 socket_ref_t head;
468 socket_ref_t so = NULL;
469 lck_mtx_t *mutex_held;
470 int fd = uap->s;
471 int newfd;
472 unsigned int fflag;
473 int dosocklock = 0;
474
475 *retval = -1;
476
477 AUDIT_ARG(fd, uap->s);
478
479 if (uap->name) {
480 error = copyin(uap->anamelen, (caddr_t)&namelen,
481 sizeof(socklen_t));
482 if (error) {
483 return error;
484 }
485 }
486 error = fp_get_ftype(p, fd, DTYPE_SOCKET, ENOTSOCK, &fp);
487 if (error) {
488 return error;
489 }
490 head = (struct socket *)fp_get_data(fp);
491
492 #if CONFIG_MACF_SOCKET_SUBSET
493 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
494 goto out;
495 }
496 #endif /* MAC_SOCKET_SUBSET */
497
498 socket_lock(head, 1);
499
500 if (head->so_proto->pr_getlock != NULL) {
501 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
502 dosocklock = 1;
503 } else {
504 mutex_held = head->so_proto->pr_domain->dom_mtx;
505 dosocklock = 0;
506 }
507
508 if ((head->so_options & SO_ACCEPTCONN) == 0) {
509 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
510 error = EOPNOTSUPP;
511 } else {
512 /* POSIX: The socket is not accepting connections */
513 error = EINVAL;
514 }
515 socket_unlock(head, 1);
516 goto out;
517 }
518 check_again:
519 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
520 socket_unlock(head, 1);
521 error = EWOULDBLOCK;
522 goto out;
523 }
524 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
525 if (head->so_state & SS_CANTRCVMORE) {
526 head->so_error = ECONNABORTED;
527 break;
528 }
529 if (head->so_usecount < 1) {
530 panic("accept: head=%p refcount=%d", head,
531 head->so_usecount);
532 }
533 error = msleep((caddr_t)&head->so_timeo, mutex_held,
534 PSOCK | PCATCH, "accept", 0);
535 if (head->so_usecount < 1) {
536 panic("accept: 2 head=%p refcount=%d", head,
537 head->so_usecount);
538 }
539 if ((head->so_state & SS_DRAINING)) {
540 error = ECONNABORTED;
541 }
542 if (error) {
543 socket_unlock(head, 1);
544 goto out;
545 }
546 }
547 if (head->so_error) {
548 error = head->so_error;
549 head->so_error = 0;
550 socket_unlock(head, 1);
551 goto out;
552 }
553
554 /*
555 * At this point we know that there is at least one connection
556 * ready to be accepted. Remove it from the queue prior to
557 * allocating the file descriptor for it since falloc() may
558 * block allowing another process to accept the connection
559 * instead.
560 */
561 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
562
563 so_acquire_accept_list(head, NULL);
564 if (TAILQ_EMPTY(&head->so_comp)) {
565 so_release_accept_list(head);
566 goto check_again;
567 }
568
569 so = TAILQ_FIRST(&head->so_comp);
570 TAILQ_REMOVE(&head->so_comp, so, so_list);
571 /*
572 * Acquire the lock of the new connection
573 * as we may be in the process of receiving
574 * a packet that may change its so_state
575 * (e.g.: a TCP FIN).
576 */
577 if (dosocklock) {
578 socket_lock(so, 0);
579 }
580 so->so_head = NULL;
581 so->so_state &= ~SS_COMP;
582 if (dosocklock) {
583 socket_unlock(so, 0);
584 }
585 head->so_qlen--;
586 so_release_accept_list(head);
587
588 /* unlock head to avoid deadlock with select, keep a ref on head */
589 socket_unlock(head, 0);
590
591 #if CONFIG_MACF_SOCKET_SUBSET
592 /*
593 * Pass the pre-accepted socket to the MAC framework. This is
594 * cheaper than allocating a file descriptor for the socket,
595 * calling the protocol accept callback, and possibly freeing
596 * the file descriptor should the MAC check fails.
597 */
598 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
599 socket_lock(so, 1);
600 so->so_state &= ~SS_NOFDREF;
601 socket_unlock(so, 1);
602 soclose(so);
603 /* Drop reference on listening socket */
604 sodereference(head);
605 goto out;
606 }
607 #endif /* MAC_SOCKET_SUBSET */
608
609 /*
610 * Pass the pre-accepted socket to any interested socket filter(s).
611 * Upon failure, the socket would have been closed by the callee.
612 */
613 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
614 /* Drop reference on listening socket */
615 sodereference(head);
616 /* Propagate socket filter's error code to the caller */
617 goto out;
618 }
619
620 fflag = fp->f_flag;
621 error = falloc(p, &fp, &newfd, vfs_context_current());
622 if (error) {
623 /*
624 * Probably ran out of file descriptors.
625 *
626 * <rdar://problem/8554930>
627 * Don't put this back on the socket like we used to, that
628 * just causes the client to spin. Drop the socket.
629 */
630 socket_lock(so, 1);
631 so->so_state &= ~SS_NOFDREF;
632 socket_unlock(so, 1);
633 soclose(so);
634 sodereference(head);
635 goto out;
636 }
637 *retval = newfd;
638 fp->f_flag = fflag;
639 fp->f_ops = &socketops;
640 fp_set_data(fp, so);
641
642 socket_lock(head, 0);
643 if (dosocklock) {
644 socket_lock(so, 1);
645 }
646
647 /* Sync socket non-blocking/async state with file flags */
648 if (fp->f_flag & FNONBLOCK) {
649 so->so_state |= SS_NBIO;
650 } else {
651 so->so_state &= ~SS_NBIO;
652 }
653
654 if (fp->f_flag & FASYNC) {
655 so->so_state |= SS_ASYNC;
656 so->so_rcv.sb_flags |= SB_ASYNC;
657 so->so_snd.sb_flags |= SB_ASYNC;
658 } else {
659 so->so_state &= ~SS_ASYNC;
660 so->so_rcv.sb_flags &= ~SB_ASYNC;
661 so->so_snd.sb_flags &= ~SB_ASYNC;
662 }
663
664 (void) soacceptlock(so, &sa, 0);
665 socket_unlock(head, 1);
666 if (sa == NULL) {
667 namelen = 0;
668 if (uap->name) {
669 goto gotnoname;
670 }
671 error = 0;
672 goto releasefd;
673 }
674 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
675
676 if (uap->name) {
677 socklen_t sa_len;
678
679 /* save sa_len before it is destroyed */
680 sa_len = sa->sa_len;
681 namelen = MIN(namelen, sa_len);
682 error = copyout(sa, uap->name, namelen);
683 if (!error) {
684 /* return the actual, untruncated address length */
685 namelen = sa_len;
686 }
687 gotnoname:
688 error = copyout((caddr_t)&namelen, uap->anamelen,
689 sizeof(socklen_t));
690 }
691 free_sockaddr(sa);
692
693 releasefd:
694 /*
695 * If the socket has been marked as inactive by sosetdefunct(),
696 * disallow further operations on it.
697 */
698 if (so->so_flags & SOF_DEFUNCT) {
699 sodefunct(current_proc(), so,
700 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
701 }
702
703 if (dosocklock) {
704 socket_unlock(so, 1);
705 }
706
707 proc_fdlock(p);
708 procfdtbl_releasefd(p, newfd, NULL);
709 fp_drop(p, newfd, fp, 1);
710 proc_fdunlock(p);
711
712 out:
713 if (error == 0 && ENTR_SHOULDTRACE) {
714 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
715 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
716 }
717
718 file_drop(fd);
719 return error;
720 }
721
722 int
accept(proc_ref_t p,struct accept_args * uap,int32_ref_t retval)723 accept(proc_ref_t p, struct accept_args *uap, int32_ref_t retval)
724 {
725 __pthread_testcancel(1);
726 return accept_nocancel(p, (struct accept_nocancel_args *)uap,
727 retval);
728 }
729
730 /*
731 * Returns: 0 Success
732 * EBADF Bad file descriptor
733 * EALREADY Connection already in progress
734 * EINPROGRESS Operation in progress
735 * ECONNABORTED Connection aborted
736 * EINTR Interrupted function
737 * EACCES Mandatory Access Control failure
738 * file_socket:ENOTSOCK
739 * file_socket:EBADF
740 * getsockaddr:ENAMETOOLONG Filename too long
741 * getsockaddr:EINVAL Invalid argument
742 * getsockaddr:ENOMEM Not enough space
743 * getsockaddr:EFAULT Bad address
744 * soconnectlock:EOPNOTSUPP
745 * soconnectlock:EISCONN
746 * soconnectlock:??? [depends on protocol, filters]
747 * msleep:EINTR
748 *
749 * Imputed: so_error error may be set from so_error, which
750 * may have been set by soconnectlock.
751 */
752 /* ARGSUSED */
753 int
connect(proc_ref_t p,struct connect_args * uap,int32_ref_t retval)754 connect(proc_ref_t p, struct connect_args *uap, int32_ref_t retval)
755 {
756 __pthread_testcancel(1);
757 return connect_nocancel(p, (struct connect_nocancel_args *)uap,
758 retval);
759 }
760
761 int
connect_nocancel(proc_t p,struct connect_nocancel_args * uap,int32_ref_t retval)762 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_ref_t retval)
763 {
764 #pragma unused(p, retval)
765 socket_ref_t so;
766 struct sockaddr_storage ss;
767 sockaddr_ref_t sa = NULL;
768 int error;
769 int fd = uap->s;
770 boolean_t dgram;
771
772 AUDIT_ARG(fd, uap->s);
773 error = file_socket(fd, &so);
774 if (error != 0) {
775 return error;
776 }
777 if (so == NULL) {
778 error = EBADF;
779 goto out;
780 }
781
782 /*
783 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
784 * if this is a datagram socket; translate for other types.
785 */
786 dgram = (so->so_type == SOCK_DGRAM);
787
788 /* Get socket address now before we obtain socket lock */
789 if (uap->namelen > sizeof(ss)) {
790 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
791 } else {
792 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
793 if (error == 0) {
794 sa = (sockaddr_ref_t)&ss;
795 }
796 }
797 if (error != 0) {
798 goto out;
799 }
800
801 error = connectit(so, sa);
802
803 if (sa != NULL && sa != SA(&ss)) {
804 free_sockaddr(sa);
805 }
806 if (error == ERESTART) {
807 error = EINTR;
808 }
809 out:
810 file_drop(fd);
811 return error;
812 }
813
814 static int
connectx_nocancel(proc_ref_t p,connectx_args_ref_t uap,int_ref_t retval)815 connectx_nocancel(proc_ref_t p, connectx_args_ref_t uap, int_ref_t retval)
816 {
817 #pragma unused(p, retval)
818 struct sockaddr_storage ss, sd;
819 sockaddr_ref_t src = NULL, dst = NULL;
820 socket_ref_t so;
821 int error, error1, fd = uap->socket;
822 boolean_t dgram;
823 sae_connid_t cid = SAE_CONNID_ANY;
824 struct user32_sa_endpoints ep32;
825 struct user64_sa_endpoints ep64;
826 struct user_sa_endpoints ep;
827 user_ssize_t bytes_written = 0;
828 struct user_iovec *iovp;
829 uio_t auio = NULL;
830
831 AUDIT_ARG(fd, uap->socket);
832 error = file_socket(fd, &so);
833 if (error != 0) {
834 return error;
835 }
836 if (so == NULL) {
837 error = EBADF;
838 goto out;
839 }
840
841 if (uap->endpoints == USER_ADDR_NULL) {
842 error = EINVAL;
843 goto out;
844 }
845
846 if (IS_64BIT_PROCESS(p)) {
847 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
848 if (error != 0) {
849 goto out;
850 }
851
852 ep.sae_srcif = ep64.sae_srcif;
853 ep.sae_srcaddr = (user_addr_t)ep64.sae_srcaddr;
854 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
855 ep.sae_dstaddr = (user_addr_t)ep64.sae_dstaddr;
856 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
857 } else {
858 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
859 if (error != 0) {
860 goto out;
861 }
862
863 ep.sae_srcif = ep32.sae_srcif;
864 ep.sae_srcaddr = ep32.sae_srcaddr;
865 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
866 ep.sae_dstaddr = ep32.sae_dstaddr;
867 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
868 }
869
870 /*
871 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
872 * if this is a datagram socket; translate for other types.
873 */
874 dgram = (so->so_type == SOCK_DGRAM);
875
876 /* Get socket address now before we obtain socket lock */
877 if (ep.sae_srcaddr != USER_ADDR_NULL) {
878 if (ep.sae_srcaddrlen > sizeof(ss)) {
879 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
880 } else {
881 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
882 if (error == 0) {
883 src = (sockaddr_ref_t)&ss;
884 }
885 }
886
887 if (error) {
888 goto out;
889 }
890 }
891
892 if (ep.sae_dstaddr == USER_ADDR_NULL) {
893 error = EINVAL;
894 goto out;
895 }
896
897 /* Get socket address now before we obtain socket lock */
898 if (ep.sae_dstaddrlen > sizeof(sd)) {
899 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
900 } else {
901 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
902 if (error == 0) {
903 dst = (sockaddr_ref_t)&sd;
904 }
905 }
906
907 if (error) {
908 goto out;
909 }
910
911 VERIFY(dst != NULL);
912
913 if (uap->iov != USER_ADDR_NULL) {
914 /* Verify range before calling uio_create() */
915 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
916 error = EINVAL;
917 goto out;
918 }
919
920 if (uap->len == USER_ADDR_NULL) {
921 error = EINVAL;
922 goto out;
923 }
924
925 /* allocate a uio to hold the number of iovecs passed */
926 auio = uio_create(uap->iovcnt, 0,
927 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
928 UIO_WRITE);
929
930 if (auio == NULL) {
931 error = ENOMEM;
932 goto out;
933 }
934
935 /*
936 * get location of iovecs within the uio.
937 * then copyin the iovecs from user space.
938 */
939 iovp = uio_iovsaddr(auio);
940 if (iovp == NULL) {
941 error = ENOMEM;
942 goto out;
943 }
944 error = copyin_user_iovec_array(uap->iov,
945 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
946 uap->iovcnt, iovp);
947 if (error != 0) {
948 goto out;
949 }
950
951 /* finish setup of uio_t */
952 error = uio_calculateresid(auio);
953 if (error != 0) {
954 goto out;
955 }
956 }
957
958 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
959 &cid, auio, uap->flags, &bytes_written);
960 if (error == ERESTART) {
961 error = EINTR;
962 }
963
964 if (uap->len != USER_ADDR_NULL) {
965 if (IS_64BIT_PROCESS(p)) {
966 error1 = copyout(&bytes_written, uap->len, sizeof(user64_size_t));
967 } else {
968 error1 = copyout(&bytes_written, uap->len, sizeof(user32_size_t));
969 }
970 /* give precedence to connectitx errors */
971 if ((error1 != 0) && (error == 0)) {
972 error = error1;
973 }
974 }
975
976 if (uap->connid != USER_ADDR_NULL) {
977 error1 = copyout(&cid, uap->connid, sizeof(cid));
978 /* give precedence to connectitx errors */
979 if ((error1 != 0) && (error == 0)) {
980 error = error1;
981 }
982 }
983 out:
984 file_drop(fd);
985 if (auio != NULL) {
986 uio_free(auio);
987 }
988 if (src != NULL && src != SA(&ss)) {
989 free_sockaddr(src);
990 }
991 if (dst != NULL && dst != SA(&sd)) {
992 free_sockaddr(dst);
993 }
994 return error;
995 }
996
997 int
connectx(proc_ref_t p,struct connectx_args * uap,int * retval)998 connectx(proc_ref_t p, struct connectx_args *uap, int *retval)
999 {
1000 /*
1001 * Due to similiarity with a POSIX interface, define as
1002 * an unofficial cancellation point.
1003 */
1004 __pthread_testcancel(1);
1005 return connectx_nocancel(p, uap, retval);
1006 }
1007
1008 static int
connectit(struct socket * so,sockaddr_ref_t sa)1009 connectit(struct socket *so, sockaddr_ref_t sa)
1010 {
1011 int error;
1012
1013 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
1014 #if CONFIG_MACF_SOCKET_SUBSET
1015 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
1016 return error;
1017 }
1018 #endif /* MAC_SOCKET_SUBSET */
1019
1020 socket_lock(so, 1);
1021 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1022 error = EALREADY;
1023 goto out;
1024 }
1025 error = soconnectlock(so, sa, 0);
1026 if (error != 0) {
1027 goto out;
1028 }
1029 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1030 error = EINPROGRESS;
1031 goto out;
1032 }
1033 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1034 lck_mtx_t *mutex_held;
1035
1036 if (so->so_proto->pr_getlock != NULL) {
1037 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1038 } else {
1039 mutex_held = so->so_proto->pr_domain->dom_mtx;
1040 }
1041 error = msleep((caddr_t)&so->so_timeo, mutex_held,
1042 PSOCK | PCATCH, __func__, 0);
1043 if (so->so_state & SS_DRAINING) {
1044 error = ECONNABORTED;
1045 }
1046 if (error != 0) {
1047 break;
1048 }
1049 }
1050 if (error == 0) {
1051 error = so->so_error;
1052 so->so_error = 0;
1053 }
1054 out:
1055 socket_unlock(so, 1);
1056 return error;
1057 }
1058
1059 static int
connectitx(struct socket * so,sockaddr_ref_t src,sockaddr_ref_t dst,proc_ref_t p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uio_t auio,unsigned int flags,user_ssize_t * bytes_written)1060 connectitx(struct socket *so, sockaddr_ref_t src,
1061 sockaddr_ref_t dst, proc_ref_t p, uint32_t ifscope,
1062 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
1063 user_ssize_t *bytes_written)
1064 {
1065 int error;
1066
1067 VERIFY(dst != NULL);
1068
1069 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1070 #if CONFIG_MACF_SOCKET_SUBSET
1071 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1072 return error;
1073 }
1074
1075 if (auio != NULL) {
1076 if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1077 return error;
1078 }
1079 }
1080 #endif /* MAC_SOCKET_SUBSET */
1081
1082 socket_lock(so, 1);
1083 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1084 error = EALREADY;
1085 goto out;
1086 }
1087
1088 error = soconnectxlocked(so, src, dst, p, ifscope,
1089 aid, pcid, flags, NULL, 0, auio, bytes_written);
1090 if (error != 0) {
1091 goto out;
1092 }
1093 /*
1094 * If, after the call to soconnectxlocked the flag is still set (in case
1095 * data has been queued and the connect() has actually been triggered,
1096 * it will have been unset by the transport), we exit immediately. There
1097 * is no reason to wait on any event.
1098 */
1099 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1100 error = 0;
1101 goto out;
1102 }
1103 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1104 error = EINPROGRESS;
1105 goto out;
1106 }
1107 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1108 lck_mtx_t *mutex_held;
1109
1110 if (so->so_proto->pr_getlock != NULL) {
1111 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1112 } else {
1113 mutex_held = so->so_proto->pr_domain->dom_mtx;
1114 }
1115 error = msleep((caddr_t)&so->so_timeo, mutex_held,
1116 PSOCK | PCATCH, __func__, 0);
1117 if (so->so_state & SS_DRAINING) {
1118 error = ECONNABORTED;
1119 }
1120 if (error != 0) {
1121 break;
1122 }
1123 }
1124 if (error == 0) {
1125 error = so->so_error;
1126 so->so_error = 0;
1127 }
1128 out:
1129 socket_unlock(so, 1);
1130 return error;
1131 }
1132
1133 int
peeloff(proc_ref_t p,struct peeloff_args * uap,int * retval)1134 peeloff(proc_ref_t p, struct peeloff_args *uap, int *retval)
1135 {
1136 #pragma unused(p, uap, retval)
1137 /*
1138 * Due to similiarity with a POSIX interface, define as
1139 * an unofficial cancellation point.
1140 */
1141 __pthread_testcancel(1);
1142 return 0;
1143 }
1144
1145 int
disconnectx(proc_ref_t p,struct disconnectx_args * uap,int * retval)1146 disconnectx(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1147 {
1148 /*
1149 * Due to similiarity with a POSIX interface, define as
1150 * an unofficial cancellation point.
1151 */
1152 __pthread_testcancel(1);
1153 return disconnectx_nocancel(p, uap, retval);
1154 }
1155
1156 static int
disconnectx_nocancel(proc_ref_t p,struct disconnectx_args * uap,int * retval)1157 disconnectx_nocancel(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1158 {
1159 #pragma unused(p, retval)
1160 socket_ref_t so;
1161 int fd = uap->s;
1162 int error;
1163
1164 error = file_socket(fd, &so);
1165 if (error != 0) {
1166 return error;
1167 }
1168 if (so == NULL) {
1169 error = EBADF;
1170 goto out;
1171 }
1172
1173 error = sodisconnectx(so, uap->aid, uap->cid);
1174 out:
1175 file_drop(fd);
1176 return error;
1177 }
1178
1179 /*
1180 * Returns: 0 Success
1181 * socreate:EAFNOSUPPORT
1182 * socreate:EPROTOTYPE
1183 * socreate:EPROTONOSUPPORT
1184 * socreate:ENOBUFS
1185 * socreate:ENOMEM
1186 * socreate:EISCONN
1187 * socreate:??? [other protocol families, IPSEC]
1188 * falloc:ENFILE
1189 * falloc:EMFILE
1190 * falloc:ENOMEM
1191 * copyout:EFAULT
1192 * soconnect2:EINVAL
1193 * soconnect2:EPROTOTYPE
1194 * soconnect2:??? [other protocol families[
1195 */
1196 int
socketpair(proc_ref_t p,struct socketpair_args * uap,__unused int32_ref_t retval)1197 socketpair(proc_ref_t p, struct socketpair_args *uap,
1198 __unused int32_ref_t retval)
1199 {
1200 fileproc_ref_t fp1, fp2;
1201 socket_ref_t so1, so2;
1202 int fd, error, sv[2];
1203
1204 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1205 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1206 if (error) {
1207 return error;
1208 }
1209 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1210 if (error) {
1211 goto free1;
1212 }
1213
1214 error = falloc(p, &fp1, &fd, vfs_context_current());
1215 if (error) {
1216 goto free2;
1217 }
1218 fp1->f_flag = FREAD | FWRITE;
1219 fp1->f_ops = &socketops;
1220 fp_set_data(fp1, so1);
1221 sv[0] = fd;
1222
1223 error = falloc(p, &fp2, &fd, vfs_context_current());
1224 if (error) {
1225 goto free3;
1226 }
1227 fp2->f_flag = FREAD | FWRITE;
1228 fp2->f_ops = &socketops;
1229 fp_set_data(fp2, so2);
1230 sv[1] = fd;
1231
1232 error = soconnect2(so1, so2);
1233 if (error) {
1234 goto free4;
1235 }
1236 if (uap->type == SOCK_DGRAM) {
1237 /*
1238 * Datagram socket connection is asymmetric.
1239 */
1240 error = soconnect2(so2, so1);
1241 if (error) {
1242 goto free4;
1243 }
1244 }
1245
1246 if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1247 goto free4;
1248 }
1249
1250 proc_fdlock(p);
1251 procfdtbl_releasefd(p, sv[0], NULL);
1252 procfdtbl_releasefd(p, sv[1], NULL);
1253 fp_drop(p, sv[0], fp1, 1);
1254 fp_drop(p, sv[1], fp2, 1);
1255 proc_fdunlock(p);
1256
1257 return 0;
1258 free4:
1259 fp_free(p, sv[1], fp2);
1260 free3:
1261 fp_free(p, sv[0], fp1);
1262 free2:
1263 (void) soclose(so2);
1264 free1:
1265 (void) soclose(so1);
1266 return error;
1267 }
1268
1269 /*
1270 * Returns: 0 Success
1271 * EINVAL
1272 * ENOBUFS
1273 * EBADF
1274 * EPIPE
1275 * EACCES Mandatory Access Control failure
1276 * file_socket:ENOTSOCK
1277 * file_socket:EBADF
1278 * getsockaddr:ENAMETOOLONG Filename too long
1279 * getsockaddr:EINVAL Invalid argument
1280 * getsockaddr:ENOMEM Not enough space
1281 * getsockaddr:EFAULT Bad address
1282 * <pru_sosend>:EACCES[TCP]
1283 * <pru_sosend>:EADDRINUSE[TCP]
1284 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1285 * <pru_sosend>:EAFNOSUPPORT[TCP]
1286 * <pru_sosend>:EAGAIN[TCP]
1287 * <pru_sosend>:EBADF
1288 * <pru_sosend>:ECONNRESET[TCP]
1289 * <pru_sosend>:EFAULT
1290 * <pru_sosend>:EHOSTUNREACH[TCP]
1291 * <pru_sosend>:EINTR
1292 * <pru_sosend>:EINVAL
1293 * <pru_sosend>:EISCONN[AF_INET]
1294 * <pru_sosend>:EMSGSIZE[TCP]
1295 * <pru_sosend>:ENETDOWN[TCP]
1296 * <pru_sosend>:ENETUNREACH[TCP]
1297 * <pru_sosend>:ENOBUFS
1298 * <pru_sosend>:ENOMEM[TCP]
1299 * <pru_sosend>:ENOTCONN[AF_INET]
1300 * <pru_sosend>:EOPNOTSUPP
1301 * <pru_sosend>:EPERM[TCP]
1302 * <pru_sosend>:EPIPE
1303 * <pru_sosend>:EWOULDBLOCK
1304 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1305 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1306 * <pru_sosend>:??? [value from so_error]
1307 * sockargs:???
1308 */
1309 static int
sendit(proc_ref_t p,struct socket * so,user_msghdr_ref_t mp,uio_t uiop,int flags,int32_ref_t retval)1310 sendit(proc_ref_t p, struct socket *so, user_msghdr_ref_t mp, uio_t uiop,
1311 int flags, int32_ref_t retval)
1312 {
1313 mbuf_ref_t control = NULL;
1314 struct sockaddr_storage ss;
1315 sockaddr_ref_t to = NULL;
1316 boolean_t want_free = TRUE;
1317 int error;
1318 user_ssize_t len;
1319
1320 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1321
1322 if (mp->msg_name != USER_ADDR_NULL) {
1323 if (mp->msg_namelen > sizeof(ss)) {
1324 error = getsockaddr(so, &to, mp->msg_name,
1325 mp->msg_namelen, TRUE);
1326 } else {
1327 error = getsockaddr_s(so, &ss, mp->msg_name,
1328 mp->msg_namelen, TRUE);
1329 if (error == 0) {
1330 to = (sockaddr_ref_t)&ss;
1331 want_free = FALSE;
1332 }
1333 }
1334 if (error != 0) {
1335 goto out;
1336 }
1337 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1338 }
1339 if (mp->msg_control != USER_ADDR_NULL) {
1340 if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1341 error = EINVAL;
1342 goto bad;
1343 }
1344 error = sockargs(&control, mp->msg_control,
1345 mp->msg_controllen, MT_CONTROL);
1346 if (error != 0) {
1347 goto bad;
1348 }
1349 }
1350
1351 #if CONFIG_MACF_SOCKET_SUBSET
1352 /*
1353 * We check the state without holding the socket lock;
1354 * if a race condition occurs, it would simply result
1355 * in an extra call to the MAC check function.
1356 */
1357 if (to != NULL &&
1358 !(so->so_state & SS_DEFUNCT) &&
1359 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1360 if (control != NULL) {
1361 m_freem(control);
1362 }
1363
1364 goto bad;
1365 }
1366 #endif /* MAC_SOCKET_SUBSET */
1367
1368 len = uio_resid(uiop);
1369 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1370 control, flags);
1371 if (error != 0) {
1372 if (uio_resid(uiop) != len && (error == ERESTART ||
1373 error == EINTR || error == EWOULDBLOCK)) {
1374 error = 0;
1375 }
1376 /* Generation of SIGPIPE can be controlled per socket */
1377 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1378 !(flags & MSG_NOSIGNAL)) {
1379 psignal(p, SIGPIPE);
1380 }
1381 }
1382 if (error == 0) {
1383 *retval = (int)(len - uio_resid(uiop));
1384 }
1385 bad:
1386 if (want_free) {
1387 free_sockaddr(to);
1388 }
1389 out:
1390 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1391
1392 return error;
1393 }
1394
1395 /*
1396 * Returns: 0 Success
1397 * ENOMEM
1398 * sendit:??? [see sendit definition in this file]
1399 * write:??? [4056224: applicable for pipes]
1400 */
1401 int
sendto(proc_ref_t p,struct sendto_args * uap,int32_ref_t retval)1402 sendto(proc_ref_t p, struct sendto_args *uap, int32_ref_t retval)
1403 {
1404 __pthread_testcancel(1);
1405 return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1406 }
1407
1408 int
sendto_nocancel(proc_ref_t p,struct sendto_nocancel_args * uap,int32_ref_t retval)1409 sendto_nocancel(proc_ref_t p,
1410 struct sendto_nocancel_args *uap,
1411 int32_ref_t retval)
1412 {
1413 struct user_msghdr msg;
1414 int error;
1415 uio_t auio = NULL;
1416 socket_ref_t so;
1417
1418 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1419 AUDIT_ARG(fd, uap->s);
1420
1421 if (uap->flags & MSG_SKIPCFIL) {
1422 error = EPERM;
1423 goto done;
1424 }
1425
1426 if (uap->len > LONG_MAX) {
1427 error = EINVAL;
1428 goto done;
1429 }
1430
1431 auio = uio_create(1, 0,
1432 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1433 UIO_WRITE);
1434 if (auio == NULL) {
1435 error = ENOMEM;
1436 goto done;
1437 }
1438 uio_addiov(auio, uap->buf, uap->len);
1439
1440 msg.msg_name = uap->to;
1441 msg.msg_namelen = uap->tolen;
1442 /* no need to set up msg_iov. sendit uses uio_t we send it */
1443 msg.msg_iov = 0;
1444 msg.msg_iovlen = 0;
1445 msg.msg_control = 0;
1446 msg.msg_flags = 0;
1447
1448 error = file_socket(uap->s, &so);
1449 if (error) {
1450 goto done;
1451 }
1452
1453 if (so == NULL) {
1454 error = EBADF;
1455 } else {
1456 error = sendit(p, so, &msg, auio, uap->flags, retval);
1457 }
1458
1459 file_drop(uap->s);
1460 done:
1461 if (auio != NULL) {
1462 uio_free(auio);
1463 }
1464
1465 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1466
1467 return error;
1468 }
1469
1470 /*
1471 * Returns: 0 Success
1472 * ENOBUFS
1473 * copyin:EFAULT
1474 * sendit:??? [see sendit definition in this file]
1475 */
1476 int
sendmsg(proc_ref_t p,struct sendmsg_args * uap,int32_ref_t retval)1477 sendmsg(proc_ref_t p, struct sendmsg_args *uap, int32_ref_t retval)
1478 {
1479 __pthread_testcancel(1);
1480 return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1481 retval);
1482 }
1483
1484 int
sendmsg_nocancel(proc_ref_t p,struct sendmsg_nocancel_args * uap,int32_ref_t retval)1485 sendmsg_nocancel(proc_ref_t p, struct sendmsg_nocancel_args *uap,
1486 int32_ref_t retval)
1487 {
1488 struct user32_msghdr msg32;
1489 struct user64_msghdr msg64;
1490 struct user_msghdr user_msg;
1491 caddr_t msghdrp;
1492 int size_of_msghdr;
1493 int error;
1494 uio_t auio = NULL;
1495 struct user_iovec *iovp;
1496 socket_ref_t so;
1497
1498 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1499
1500 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1501 AUDIT_ARG(fd, uap->s);
1502
1503 if (uap->flags & MSG_SKIPCFIL) {
1504 error = EPERM;
1505 goto done;
1506 }
1507
1508 if (is_p_64bit_process) {
1509 msghdrp = (caddr_t)&msg64;
1510 size_of_msghdr = sizeof(msg64);
1511 } else {
1512 msghdrp = (caddr_t)&msg32;
1513 size_of_msghdr = sizeof(msg32);
1514 }
1515 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1516 if (error) {
1517 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1518 return error;
1519 }
1520
1521 if (is_p_64bit_process) {
1522 user_msg.msg_flags = msg64.msg_flags;
1523 user_msg.msg_controllen = msg64.msg_controllen;
1524 user_msg.msg_control = (user_addr_t)msg64.msg_control;
1525 user_msg.msg_iovlen = msg64.msg_iovlen;
1526 user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
1527 user_msg.msg_namelen = msg64.msg_namelen;
1528 user_msg.msg_name = (user_addr_t)msg64.msg_name;
1529 } else {
1530 user_msg.msg_flags = msg32.msg_flags;
1531 user_msg.msg_controllen = msg32.msg_controllen;
1532 user_msg.msg_control = msg32.msg_control;
1533 user_msg.msg_iovlen = msg32.msg_iovlen;
1534 user_msg.msg_iov = msg32.msg_iov;
1535 user_msg.msg_namelen = msg32.msg_namelen;
1536 user_msg.msg_name = msg32.msg_name;
1537 }
1538
1539 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1540 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1541 0, 0, 0, 0);
1542 return EMSGSIZE;
1543 }
1544
1545 /* allocate a uio large enough to hold the number of iovecs passed */
1546 auio = uio_create(user_msg.msg_iovlen, 0,
1547 (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1548 UIO_WRITE);
1549 if (auio == NULL) {
1550 error = ENOBUFS;
1551 goto done;
1552 }
1553
1554 if (user_msg.msg_iovlen) {
1555 /*
1556 * get location of iovecs within the uio.
1557 * then copyin the iovecs from user space.
1558 */
1559 iovp = uio_iovsaddr(auio);
1560 if (iovp == NULL) {
1561 error = ENOBUFS;
1562 goto done;
1563 }
1564 error = copyin_user_iovec_array(user_msg.msg_iov,
1565 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1566 user_msg.msg_iovlen, iovp);
1567 if (error) {
1568 goto done;
1569 }
1570 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1571
1572 /* finish setup of uio_t */
1573 error = uio_calculateresid(auio);
1574 if (error) {
1575 goto done;
1576 }
1577 } else {
1578 user_msg.msg_iov = 0;
1579 }
1580
1581 /* msg_flags is ignored for send */
1582 user_msg.msg_flags = 0;
1583
1584 error = file_socket(uap->s, &so);
1585 if (error) {
1586 goto done;
1587 }
1588 if (so == NULL) {
1589 error = EBADF;
1590 } else {
1591 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1592 }
1593 file_drop(uap->s);
1594 done:
1595 if (auio != NULL) {
1596 uio_free(auio);
1597 }
1598 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1599
1600 return error;
1601 }
1602
1603 #if DEBUG || DEVELOPMENT
1604 static int
sendmsg_x_old(proc_ref_t p,struct sendmsg_x_args * uap,user_ssize_t * retval)1605 sendmsg_x_old(proc_ref_t p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1606 {
1607 int error = 0;
1608 user_msghdr_x_ptr_t user_msg_x = NULL;
1609 uio_ref_ptr_t uiop = NULL;
1610 socket_ref_t so;
1611 u_int i;
1612 sockaddr_ref_t to = NULL;
1613 user_ssize_t len_before = 0, len_after;
1614 int need_drop = 0;
1615 size_t size_of_msghdr;
1616 void_ptr_t umsgp = NULL;
1617 u_int uiocnt = 0;
1618 int has_addr_or_ctl = 0;
1619
1620 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1621
1622 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1623 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1624
1625 if (uap->flags & MSG_SKIPCFIL) {
1626 error = EPERM;
1627 goto out;
1628 }
1629
1630 error = file_socket(uap->s, &so);
1631 if (error) {
1632 goto out;
1633 }
1634 need_drop = 1;
1635 if (so == NULL) {
1636 error = EBADF;
1637 goto out;
1638 }
1639
1640 /*
1641 * Input parameter range check
1642 */
1643 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1644 error = EINVAL;
1645 goto out;
1646 }
1647 /*
1648 * Clip to max currently allowed
1649 */
1650 if (uap->cnt > somaxsendmsgx) {
1651 uap->cnt = somaxsendmsgx > 0 ? somaxsendmsgx : 1;
1652 }
1653
1654 user_msg_x = kalloc_type(struct user_msghdr_x, uap->cnt,
1655 Z_WAITOK | Z_ZERO);
1656 if (user_msg_x == NULL) {
1657 DBG_PRINTF("%s user_msg_x alloc failed", __func__);
1658 error = ENOMEM;
1659 goto out;
1660 }
1661 uiop = kalloc_type(uio_ref_t, uap->cnt, Z_WAITOK | Z_ZERO);
1662 if (uiop == NULL) {
1663 DBG_PRINTF("%s uiop alloc failed", __func__);
1664 error = ENOMEM;
1665 goto out;
1666 }
1667
1668 umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
1669 if (umsgp == NULL) {
1670 DBG_PRINTF("%s user_msg_x alloc failed", __func__);
1671 error = ENOMEM;
1672 goto out;
1673 }
1674 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1675 if (error) {
1676 DBG_PRINTF("%s copyin() failed", __func__);
1677 goto out;
1678 }
1679 error = internalize_user_msghdr_array(umsgp,
1680 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1681 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1682 if (error) {
1683 DBG_PRINTF("%s copyin_user_msghdr_array() failed", __func__);
1684 goto out;
1685 }
1686 /*
1687 * Make sure the size of each message iovec and
1688 * the aggregate size of all the iovec is valid
1689 */
1690 if (uio_array_is_valid(uiop, uap->cnt) == false) {
1691 error = EINVAL;
1692 goto out;
1693 }
1694
1695 /*
1696 * Sanity check on passed arguments
1697 */
1698 for (i = 0; i < uap->cnt; i++) {
1699 struct user_msghdr_x *mp = user_msg_x + i;
1700
1701 /*
1702 * No flags on send message
1703 */
1704 if (mp->msg_flags != 0) {
1705 error = EINVAL;
1706 goto out;
1707 }
1708 /*
1709 * No support for address or ancillary data (yet)
1710 */
1711 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
1712 has_addr_or_ctl = 1;
1713 }
1714
1715 if (mp->msg_control != USER_ADDR_NULL ||
1716 mp->msg_controllen != 0) {
1717 has_addr_or_ctl = 1;
1718 }
1719
1720 #if CONFIG_MACF_SOCKET_SUBSET
1721 /*
1722 * We check the state without holding the socket lock;
1723 * if a race condition occurs, it would simply result
1724 * in an extra call to the MAC check function.
1725 *
1726 * Note: The following check is never true taken with the
1727 * current limitation that we do not accept to pass an address,
1728 * this is effectively placeholder code. If we add support for
1729 * addresses, we will have to check every address.
1730 */
1731 if (to != NULL &&
1732 !(so->so_state & SS_DEFUNCT) &&
1733 (error = mac_socket_check_send(kauth_cred_get(), so, to))
1734 != 0) {
1735 goto out;
1736 }
1737 #endif /* MAC_SOCKET_SUBSET */
1738 }
1739
1740 len_before = uio_array_resid(uiop, uap->cnt);
1741
1742 for (i = 0; i < uap->cnt; i++) {
1743 struct user_msghdr_x *mp = user_msg_x + i;
1744 struct user_msghdr user_msg;
1745 uio_t auio = uiop[i];
1746 int32_t tmpval;
1747
1748 user_msg.msg_flags = mp->msg_flags;
1749 user_msg.msg_controllen = mp->msg_controllen;
1750 user_msg.msg_control = mp->msg_control;
1751 user_msg.msg_iovlen = mp->msg_iovlen;
1752 user_msg.msg_iov = mp->msg_iov;
1753 user_msg.msg_namelen = mp->msg_namelen;
1754 user_msg.msg_name = mp->msg_name;
1755
1756 error = sendit(p, so, &user_msg, auio, uap->flags,
1757 &tmpval);
1758 if (error != 0) {
1759 break;
1760 }
1761 uiocnt += 1;
1762 }
1763
1764 len_after = uio_array_resid(uiop, uap->cnt);
1765
1766 VERIFY(len_after <= len_before);
1767
1768 if (error != 0) {
1769 if (len_after != len_before && (error == ERESTART ||
1770 error == EINTR || error == EWOULDBLOCK ||
1771 error == ENOBUFS)) {
1772 error = 0;
1773 }
1774 /* Generation of SIGPIPE can be controlled per socket */
1775 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1776 !(uap->flags & MSG_NOSIGNAL)) {
1777 psignal(p, SIGPIPE);
1778 }
1779 }
1780 if (error == 0) {
1781 externalize_user_msghdr_array(umsgp,
1782 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1783 UIO_WRITE, uiocnt, user_msg_x, uiop);
1784
1785 *retval = (int)(uiocnt);
1786 }
1787 out:
1788 if (need_drop) {
1789 file_drop(uap->s);
1790 }
1791 kfree_data(umsgp, uap->cnt * size_of_msghdr);
1792 if (uiop != NULL) {
1793 free_uio_array(uiop, uap->cnt);
1794 kfree_type(uio_ref_t, uap->cnt, uiop);
1795 }
1796 kfree_type(struct user_msghdr_x, uap->cnt, user_msg_x);
1797
1798 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1799
1800 return error;
1801 }
1802 #endif /* DEBUG || DEVELOPMENT */
1803
1804 static int
internalize_user_msg_x(struct user_msghdr * user_msg,uio_t * auiop,proc_ref_t p,void_ptr_t user_msghdr_x_src)1805 internalize_user_msg_x(struct user_msghdr *user_msg, uio_t *auiop, proc_ref_t p, void_ptr_t user_msghdr_x_src)
1806 {
1807 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1808 uio_t auio = *auiop;
1809 int error;
1810
1811 if (is_p_64bit_process) {
1812 struct user64_msghdr_x msghdrx64;
1813
1814 error = copyin((user_addr_t)user_msghdr_x_src,
1815 &msghdrx64, sizeof(msghdrx64));
1816 if (error != 0) {
1817 DBG_PRINTF("%s copyin() msghdrx64 failed %d",
1818 __func__, error);
1819 goto done;
1820 }
1821 user_msg->msg_name = msghdrx64.msg_name;
1822 user_msg->msg_namelen = msghdrx64.msg_namelen;
1823 user_msg->msg_iov = msghdrx64.msg_iov;
1824 user_msg->msg_iovlen = msghdrx64.msg_iovlen;
1825 user_msg->msg_control = msghdrx64.msg_control;
1826 user_msg->msg_controllen = msghdrx64.msg_controllen;
1827 } else {
1828 struct user32_msghdr_x msghdrx32;
1829
1830 error = copyin((user_addr_t)user_msghdr_x_src,
1831 &msghdrx32, sizeof(msghdrx32));
1832 if (error != 0) {
1833 DBG_PRINTF("%s copyin() msghdrx32 failed %d",
1834 __func__, error);
1835 goto done;
1836 }
1837 user_msg->msg_name = msghdrx32.msg_name;
1838 user_msg->msg_namelen = msghdrx32.msg_namelen;
1839 user_msg->msg_iov = msghdrx32.msg_iov;
1840 user_msg->msg_iovlen = msghdrx32.msg_iovlen;
1841 user_msg->msg_control = msghdrx32.msg_control;
1842 user_msg->msg_controllen = msghdrx32.msg_controllen;
1843 }
1844 /* msg_flags is ignored for send */
1845 user_msg->msg_flags = 0;
1846
1847 if (user_msg->msg_iovlen <= 0 || user_msg->msg_iovlen > UIO_MAXIOV) {
1848 error = EMSGSIZE;
1849 DBG_PRINTF("%s bad msg_iovlen, error %d",
1850 __func__, error);
1851 goto done;
1852 }
1853 /*
1854 * Attempt to reuse the uio if large enough, otherwise we need
1855 * a new one
1856 */
1857 if (auio != NULL) {
1858 if (auio->uio_max_iovs >= user_msg->msg_iovlen) {
1859 uio_reset(auio, 0,
1860 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1861 UIO_WRITE);
1862 } else {
1863 uio_free(auio);
1864 auio = NULL;
1865 }
1866 }
1867 if (auio == NULL) {
1868 auio = uio_create(user_msg->msg_iovlen, 0,
1869 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1870 UIO_WRITE);
1871 if (auio == NULL) {
1872 error = ENOBUFS;
1873 DBG_PRINTF("%s uio_create() failed %d",
1874 __func__, error);
1875 goto done;
1876 }
1877 }
1878
1879 if (user_msg->msg_iovlen) {
1880 /*
1881 * get location of iovecs within the uio.
1882 * then copyin the iovecs from user space.
1883 */
1884 struct user_iovec *iovp = uio_iovsaddr(auio);
1885 if (iovp == NULL) {
1886 error = ENOBUFS;
1887 goto done;
1888 }
1889 error = copyin_user_iovec_array(user_msg->msg_iov,
1890 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1891 user_msg->msg_iovlen, iovp);
1892 if (error != 0) {
1893 goto done;
1894 }
1895 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
1896
1897 /* finish setup of uio_t */
1898 error = uio_calculateresid(auio);
1899 if (error) {
1900 goto done;
1901 }
1902 } else {
1903 user_msg->msg_iov = 0;
1904 }
1905
1906 done:
1907 *auiop = auio;
1908 return error;
1909 }
1910
1911 static int
mbuf_packet_from_uio(socket_ref_t so,mbuf_ref_ref_t mp,uio_t auio)1912 mbuf_packet_from_uio(socket_ref_t so, mbuf_ref_ref_t mp, uio_t auio)
1913 {
1914 int error = 0;
1915 uint16_t headroom = 0;
1916 size_t bytes_to_alloc;
1917 mbuf_ref_t top = NULL, m;
1918
1919 if (soreserveheadroom != 0) {
1920 headroom = so->so_pktheadroom;
1921 }
1922 bytes_to_alloc = headroom + uio_resid(auio);
1923
1924 error = mbuf_allocpacket(MBUF_WAITOK, bytes_to_alloc, NULL, &top);
1925 if (error != 0) {
1926 os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: mbuf_allocpacket %zu error %d",
1927 bytes_to_alloc, error);
1928 goto done;
1929 }
1930
1931 if (headroom > 0 && headroom < mbuf_maxlen(top)) {
1932 top->m_data += headroom;
1933 }
1934
1935 for (m = top; m != NULL; m = m->m_next) {
1936 int bytes_to_copy = (int)uio_resid(auio);
1937 ssize_t mlen;
1938
1939 if ((m->m_flags & M_EXT)) {
1940 mlen = m->m_ext.ext_size -
1941 M_LEADINGSPACE(m);
1942 } else if ((m->m_flags & M_PKTHDR)) {
1943 mlen = MHLEN - M_LEADINGSPACE(m);
1944 m_add_crumb(m, PKT_CRUMB_SOSEND);
1945 } else {
1946 mlen = MLEN - M_LEADINGSPACE(m);
1947 }
1948 int len = imin((int)mlen, bytes_to_copy);
1949
1950 error = uiomove(mtod(m, caddr_t), (int)len, auio);
1951 if (error != 0) {
1952 os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: len %d error %d",
1953 len, error);
1954 goto done;
1955 }
1956 m->m_len = len;
1957 top->m_pkthdr.len += len;
1958 }
1959
1960 done:
1961 if (error != 0) {
1962 m_freem(top);
1963 } else {
1964 *mp = top;
1965 }
1966 return error;
1967 }
1968
1969 static int
sendit_x(proc_ref_t p,socket_ref_t so,struct sendmsg_x_args * uap,u_int * retval)1970 sendit_x(proc_ref_t p, socket_ref_t so, struct sendmsg_x_args *uap, u_int *retval)
1971 {
1972 int error = 0;
1973 uio_t auio = NULL;
1974 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1975 void_ptr_t src;
1976 MBUFQ_HEAD() pktlist = {};
1977 size_t total_pkt_len = 0;
1978 u_int pkt_cnt = 0;
1979 int flags = uap->flags;
1980 mbuf_ref_t top;
1981
1982 MBUFQ_INIT(&pktlist);
1983
1984 *retval = 0;
1985
1986 /* We re-use the uio when possible */
1987 auio = uio_create(1, 0,
1988 (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1989 UIO_WRITE);
1990 if (auio == NULL) {
1991 error = ENOBUFS;
1992 DBG_PRINTF("%s uio_create() failed %d",
1993 __func__, error);
1994 goto done;
1995 }
1996
1997 src = (void_ptr_t)uap->msgp;
1998
1999 /*
2000 * Create a list of packets
2001 */
2002 for (u_int i = 0; i < uap->cnt; i++) {
2003 struct user_msghdr user_msg = {};
2004 mbuf_ref_t m = NULL;
2005
2006 if (is_p_64bit_process) {
2007 error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user64_msghdr_x *)src) + i);
2008 if (error != 0) {
2009 os_log(OS_LOG_DEFAULT, "sendit_x: internalize_user_msg_x error %d\n", error);
2010 goto done;
2011 }
2012 } else {
2013 error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user32_msghdr_x *)src) + i);
2014 if (error != 0) {
2015 os_log(OS_LOG_DEFAULT, "sendit_x: internalize_user_msg_x error %d\n", error);
2016 goto done;
2017 }
2018 }
2019 /*
2020 * Stop on the first datagram that is too large
2021 */
2022 if (uio_resid(auio) > so->so_snd.sb_hiwat) {
2023 if (i == 0) {
2024 error = EMSGSIZE;
2025 goto done;
2026 }
2027 break;
2028 }
2029 /*
2030 * An mbuf packet has the control mbuf(s) followed by data
2031 * We allocate the mbufs in reverse order
2032 */
2033 error = mbuf_packet_from_uio(so, &m, auio);
2034 if (error != 0) {
2035 os_log(OS_LOG_DEFAULT, "sendit_x: mbuf_packet_from_uio error %d\n", error);
2036 goto done;
2037 }
2038 total_pkt_len += m->m_pkthdr.len;
2039
2040 if (user_msg.msg_control != USER_ADDR_NULL && user_msg.msg_controllen != 0) {
2041 mbuf_ref_t control = NULL;
2042
2043 error = sockargs(&control, user_msg.msg_control, user_msg.msg_controllen, MT_CONTROL);
2044 if (error != 0) {
2045 os_log(OS_LOG_DEFAULT, "sendit_x: sockargs error %d\n", error);
2046 goto done;
2047 }
2048 control->m_next = m;
2049 m = control;
2050 }
2051 MBUFQ_ENQUEUE(&pktlist, m);
2052
2053 pkt_cnt += 1;
2054 }
2055
2056 top = MBUFQ_FIRST(&pktlist);
2057 MBUFQ_INIT(&pktlist);
2058 error = sosend_list(so, top, total_pkt_len, &pkt_cnt, flags);
2059 if (error != 0) {
2060 os_log(OS_LOG_DEFAULT, "sendit_x: sosend_list error %d\n", error);
2061 goto done;
2062 }
2063 done:
2064 *retval = pkt_cnt;
2065
2066 if (auio != NULL) {
2067 uio_free(auio);
2068 }
2069 MBUFQ_DRAIN(&pktlist);
2070 return error;
2071 }
2072
2073 int
sendmsg_x(proc_ref_t p,struct sendmsg_x_args * uap,user_ssize_t * retval)2074 sendmsg_x(proc_ref_t p, struct sendmsg_x_args *uap, user_ssize_t *retval)
2075 {
2076 void_ptr_t src;
2077 int error;
2078 uio_t auio = NULL;
2079 socket_ref_t so;
2080 u_int uiocnt = 0;
2081 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2082
2083 #if DEBUG || DEVELOPMENT
2084 if (sendmsg_x_mode == 2) {
2085 return sendmsg_x_old(p, uap, retval);
2086 }
2087 #endif /* DEBUG || DEVELOPMENT */
2088
2089 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2090 AUDIT_ARG(fd, uap->s);
2091
2092 if (uap->flags & MSG_SKIPCFIL) {
2093 error = EPERM;
2094 goto done_no_filedrop;
2095 }
2096
2097 error = file_socket(uap->s, &so);
2098 if (error) {
2099 goto done_no_filedrop;
2100 }
2101 if (so == NULL) {
2102 error = EBADF;
2103 goto done;
2104 }
2105
2106 /*
2107 * For an atomic datagram connected socket we can build the list of
2108 * mbuf packets with sosend_list()
2109 */
2110 if (so->so_type == SOCK_DGRAM && sosendallatonce(so) &&
2111 (so->so_state & SS_ISCONNECTED) && sendmsg_x_mode != 1) {
2112 error = sendit_x(p, so, uap, &uiocnt);
2113 if (error != 0) {
2114 DBG_PRINTF("%s sendit_x() failed %d",
2115 __func__, error);
2116 }
2117 goto done;
2118 }
2119
2120 src = (void_ptr_t)uap->msgp;
2121
2122 /* We re-use the uio when possible */
2123 auio = uio_create(1, 0,
2124 (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
2125 UIO_WRITE);
2126 if (auio == NULL) {
2127 error = ENOBUFS;
2128 DBG_PRINTF("%s uio_create() failed %d",
2129 __func__, error);
2130 goto done;
2131 }
2132
2133 for (u_int i = 0; i < uap->cnt; i++) {
2134 struct user_msghdr user_msg = {};
2135
2136 if (is_p_64bit_process) {
2137 error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user64_msghdr_x *)src) + i);
2138 if (error != 0) {
2139 goto done;
2140 }
2141 } else {
2142 error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user32_msghdr_x *)src) + i);
2143 if (error != 0) {
2144 goto done;
2145 }
2146 }
2147
2148 int32_t len = 0;
2149 error = sendit(p, so, &user_msg, auio, uap->flags, &len);
2150 if (error != 0) {
2151 break;
2152 }
2153 uiocnt += 1;
2154 }
2155 done:
2156 if (error != 0) {
2157 if (uiocnt != 0 && (error == ERESTART ||
2158 error == EINTR || error == EWOULDBLOCK ||
2159 error == ENOBUFS || error == EMSGSIZE)) {
2160 error = 0;
2161 }
2162 /* Generation of SIGPIPE can be controlled per socket */
2163 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
2164 !(uap->flags & MSG_NOSIGNAL)) {
2165 psignal(p, SIGPIPE);
2166 }
2167 }
2168 if (error == 0) {
2169 *retval = (int)(uiocnt);
2170 }
2171 file_drop(uap->s);
2172
2173 done_no_filedrop:
2174 if (auio != NULL) {
2175 uio_free(auio);
2176 }
2177 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2178
2179 return error;
2180 }
2181
2182
2183 static int
copyout_sa(sockaddr_ref_t fromsa,user_addr_t name,socklen_t * namelen)2184 copyout_sa(sockaddr_ref_t fromsa, user_addr_t name, socklen_t *namelen)
2185 {
2186 int error = 0;
2187 socklen_t sa_len = 0;
2188 ssize_t len;
2189
2190 len = *namelen;
2191 if (len <= 0 || fromsa == 0) {
2192 len = 0;
2193 } else {
2194 #ifndef MIN
2195 #define MIN(a, b) ((a) > (b) ? (b) : (a))
2196 #endif
2197 sa_len = fromsa->sa_len;
2198 len = MIN((unsigned int)len, sa_len);
2199 error = copyout(fromsa, name, (unsigned)len);
2200 if (error) {
2201 goto out;
2202 }
2203 }
2204 *namelen = sa_len;
2205 out:
2206 return 0;
2207 }
2208
2209 static int
copyout_maddr(struct mbuf * m,user_addr_t name,socklen_t * namelen)2210 copyout_maddr(struct mbuf *m, user_addr_t name, socklen_t *namelen)
2211 {
2212 int error = 0;
2213 socklen_t sa_len = 0;
2214 ssize_t len;
2215
2216 len = *namelen;
2217 if (len <= 0 || m == NULL) {
2218 len = 0;
2219 } else {
2220 #ifndef MIN
2221 #define MIN(a, b) ((a) > (b) ? (b) : (a))
2222 #endif
2223 struct sockaddr *fromsa = mtod(m, struct sockaddr *);
2224
2225 sa_len = fromsa->sa_len;
2226 len = MIN((unsigned int)len, sa_len);
2227 error = copyout(fromsa, name, (unsigned)len);
2228 if (error != 0) {
2229 goto out;
2230 }
2231 }
2232 *namelen = sa_len;
2233 out:
2234 return 0;
2235 }
2236
2237 static int
copyout_control(proc_ref_t p,mbuf_ref_t m,user_addr_t control,socklen_ref_t controllen,int_ref_t flags,socket_ref_t so)2238 copyout_control(proc_ref_t p, mbuf_ref_t m, user_addr_t control,
2239 socklen_ref_t controllen, int_ref_t flags, socket_ref_t so)
2240 {
2241 int error = 0;
2242 socklen_t len;
2243 user_addr_t ctlbuf;
2244 struct inpcb *inp = NULL;
2245 bool want_pktinfo = false;
2246 bool seen_pktinfo = false;
2247
2248 if (so != NULL && (SOCK_DOM(so) == PF_INET6 || SOCK_DOM(so) == PF_INET)) {
2249 inp = sotoinpcb(so);
2250 want_pktinfo = (inp->inp_flags & IN6P_PKTINFO) != 0;
2251 }
2252
2253 len = *controllen;
2254 *controllen = 0;
2255 ctlbuf = control;
2256
2257 while (m && len > 0) {
2258 socklen_t tocopy;
2259 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
2260 socklen_t cp_size = CMSG_ALIGN(cp->cmsg_len);
2261 socklen_t buflen = m->m_len;
2262
2263 while (buflen > 0 && len > 0) {
2264 /*
2265 * SCM_TIMESTAMP hack because struct timeval has a
2266 * different size for 32 bits and 64 bits processes
2267 */
2268 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
2269 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
2270 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
2271 socklen_t tmp_space;
2272 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
2273
2274 tmp_cp->cmsg_level = SOL_SOCKET;
2275 tmp_cp->cmsg_type = SCM_TIMESTAMP;
2276
2277 if (proc_is64bit(p)) {
2278 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
2279
2280 os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
2281 os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
2282
2283 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
2284 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
2285 } else {
2286 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
2287
2288 tv32->tv_sec = (user32_time_t)tv->tv_sec;
2289 tv32->tv_usec = tv->tv_usec;
2290
2291 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
2292 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
2293 }
2294 if (len >= tmp_space) {
2295 tocopy = tmp_space;
2296 } else {
2297 *flags |= MSG_CTRUNC;
2298 tocopy = len;
2299 }
2300 error = copyout(tmp_buffer, ctlbuf, tocopy);
2301 if (error) {
2302 goto out;
2303 }
2304 } else {
2305 /* If socket has flow tracking and socket did not request address, ignore it */
2306 if (SOFLOW_ENABLED(so) &&
2307 ((cp->cmsg_level == IPPROTO_IP && cp->cmsg_type == IP_RECVDSTADDR && inp != NULL &&
2308 !(inp->inp_flags & INP_RECVDSTADDR)) ||
2309 (cp->cmsg_level == IPPROTO_IPV6 && (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO) && inp &&
2310 !(inp->inp_flags & IN6P_PKTINFO)))) {
2311 tocopy = 0;
2312 } else {
2313 if (cp_size > buflen) {
2314 panic("cp_size > buflen, something wrong with alignment!");
2315 }
2316 if (len >= cp_size) {
2317 tocopy = cp_size;
2318 } else {
2319 *flags |= MSG_CTRUNC;
2320 tocopy = len;
2321 }
2322 error = copyout((caddr_t) cp, ctlbuf, tocopy);
2323 if (error) {
2324 goto out;
2325 }
2326 if (want_pktinfo && cp->cmsg_level == IPPROTO_IPV6 &&
2327 (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO)) {
2328 seen_pktinfo = true;
2329 }
2330 }
2331 }
2332
2333
2334 ctlbuf += tocopy;
2335 len -= tocopy;
2336
2337 buflen -= cp_size;
2338 cp = (struct cmsghdr *)(void *)
2339 ((unsigned char *) cp + cp_size);
2340 cp_size = CMSG_ALIGN(cp->cmsg_len);
2341 }
2342
2343 m = m->m_next;
2344 }
2345 *controllen = (socklen_t)(ctlbuf - control);
2346 out:
2347 if (want_pktinfo && !seen_pktinfo) {
2348 missingpktinfo += 1;
2349 #if (DEBUG || DEVELOPMENT)
2350 char pname[MAXCOMLEN];
2351 char local[MAX_IPv6_STR_LEN + 6];
2352 char remote[MAX_IPv6_STR_LEN + 6];
2353
2354 proc_name(so->last_pid, pname, sizeof(MAXCOMLEN));
2355 if (inp->inp_vflag & INP_IPV6) {
2356 inet_ntop(AF_INET6, &inp->in6p_laddr.s6_addr, local, sizeof(local));
2357 inet_ntop(AF_INET6, &inp->in6p_faddr.s6_addr, remote, sizeof(local));
2358 } else {
2359 inet_ntop(AF_INET, &inp->inp_laddr.s_addr, local, sizeof(local));
2360 inet_ntop(AF_INET, &inp->inp_faddr.s_addr, remote, sizeof(local));
2361 }
2362
2363 os_log(OS_LOG_DEFAULT,
2364 "cmsg IPV6_PKTINFO missing for %s:%u > %s:%u proc %s.%u error %d\n",
2365 local, ntohs(inp->inp_lport), remote, ntohs(inp->inp_fport),
2366 pname, so->last_pid, error);
2367 #endif /* (DEBUG || DEVELOPMENT) */
2368 }
2369 return error;
2370 }
2371
2372 /*
2373 * Returns: 0 Success
2374 * ENOTSOCK
2375 * EINVAL
2376 * EBADF
2377 * EACCES Mandatory Access Control failure
2378 * copyout:EFAULT
2379 * fp_lookup:EBADF
2380 * <pru_soreceive>:ENOBUFS
2381 * <pru_soreceive>:ENOTCONN
2382 * <pru_soreceive>:EWOULDBLOCK
2383 * <pru_soreceive>:EFAULT
2384 * <pru_soreceive>:EINTR
2385 * <pru_soreceive>:EBADF
2386 * <pru_soreceive>:EINVAL
2387 * <pru_soreceive>:EMSGSIZE
2388 * <pru_soreceive>:???
2389 *
2390 * Notes: Additional return values from calls through <pru_soreceive>
2391 * depend on protocols other than TCP or AF_UNIX, which are
2392 * documented above.
2393 */
2394 static int
recvit(proc_ref_t p,int s,user_msghdr_ref_t mp,uio_t uiop,user_addr_t namelenp,int32_ref_t retval)2395 recvit(proc_ref_t p, int s, user_msghdr_ref_t mp, uio_t uiop,
2396 user_addr_t namelenp, int32_ref_t retval)
2397 {
2398 ssize_t len;
2399 int error;
2400 mbuf_ref_t control = 0;
2401 socket_ref_t so;
2402 sockaddr_ref_t fromsa = 0;
2403 fileproc_ref_t fp;
2404
2405 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
2406 if ((error = fp_get_ftype(p, s, DTYPE_SOCKET, ENOTSOCK, &fp))) {
2407 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2408 return error;
2409 }
2410 so = (struct socket *)fp_get_data(fp);
2411
2412 #if CONFIG_MACF_SOCKET_SUBSET
2413 /*
2414 * We check the state without holding the socket lock;
2415 * if a race condition occurs, it would simply result
2416 * in an extra call to the MAC check function.
2417 */
2418 if (!(so->so_state & SS_DEFUNCT) &&
2419 !(so->so_state & SS_ISCONNECTED) &&
2420 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2421 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2422 goto out1;
2423 }
2424 #endif /* MAC_SOCKET_SUBSET */
2425 if (uio_resid(uiop) < 0 || uio_resid(uiop) > INT_MAX) {
2426 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
2427 error = EINVAL;
2428 goto out1;
2429 }
2430
2431 len = uio_resid(uiop);
2432 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
2433 NULL, mp->msg_control ? &control : NULL,
2434 &mp->msg_flags);
2435 if (fromsa) {
2436 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
2437 fromsa);
2438 }
2439 if (error) {
2440 if (uio_resid(uiop) != len && (error == ERESTART ||
2441 error == EINTR || error == EWOULDBLOCK)) {
2442 error = 0;
2443 }
2444 }
2445 if (error) {
2446 goto out;
2447 }
2448
2449 *retval = (int32_t)(len - uio_resid(uiop));
2450
2451 if (mp->msg_name) {
2452 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
2453 if (error) {
2454 goto out;
2455 }
2456 /* return the actual, untruncated address length */
2457 if (namelenp &&
2458 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2459 sizeof(int)))) {
2460 goto out;
2461 }
2462 }
2463
2464 if (mp->msg_control) {
2465 error = copyout_control(p, control, mp->msg_control,
2466 &mp->msg_controllen, &mp->msg_flags, so);
2467 }
2468 out:
2469 free_sockaddr(fromsa);
2470 if (control) {
2471 m_freem(control);
2472 }
2473 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2474 out1:
2475 fp_drop(p, s, fp, 0);
2476 return error;
2477 }
2478
2479 /*
2480 * Returns: 0 Success
2481 * ENOMEM
2482 * copyin:EFAULT
2483 * recvit:???
2484 * read:??? [4056224: applicable for pipes]
2485 *
2486 * Notes: The read entry point is only called as part of support for
2487 * binary backward compatability; new code should use read
2488 * instead of recv or recvfrom when attempting to read data
2489 * from pipes.
2490 *
2491 * For full documentation of the return codes from recvit, see
2492 * the block header for the recvit function.
2493 */
2494 int
recvfrom(proc_ref_t p,struct recvfrom_args * uap,int32_ref_t retval)2495 recvfrom(proc_ref_t p, struct recvfrom_args *uap, int32_ref_t retval)
2496 {
2497 __pthread_testcancel(1);
2498 return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2499 retval);
2500 }
2501
2502 int
recvfrom_nocancel(proc_ref_t p,struct recvfrom_nocancel_args * uap,int32_ref_t retval)2503 recvfrom_nocancel(proc_ref_t p, struct recvfrom_nocancel_args *uap,
2504 int32_ref_t retval)
2505 {
2506 struct user_msghdr msg;
2507 int error;
2508 uio_t auio = NULL;
2509
2510 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2511 AUDIT_ARG(fd, uap->s);
2512
2513 if (uap->fromlenaddr) {
2514 error = copyin(uap->fromlenaddr,
2515 (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2516 if (error) {
2517 return error;
2518 }
2519 } else {
2520 msg.msg_namelen = 0;
2521 }
2522 msg.msg_name = uap->from;
2523 auio = uio_create(1, 0,
2524 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2525 UIO_READ);
2526 if (auio == NULL) {
2527 return ENOMEM;
2528 }
2529
2530 uio_addiov(auio, uap->buf, uap->len);
2531 /* no need to set up msg_iov. recvit uses uio_t we send it */
2532 msg.msg_iov = 0;
2533 msg.msg_iovlen = 0;
2534 msg.msg_control = 0;
2535 msg.msg_controllen = 0;
2536 msg.msg_flags = uap->flags;
2537 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2538 if (auio != NULL) {
2539 uio_free(auio);
2540 }
2541
2542 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2543
2544 return error;
2545 }
2546
2547 /*
2548 * Returns: 0 Success
2549 * EMSGSIZE
2550 * ENOMEM
2551 * copyin:EFAULT
2552 * copyout:EFAULT
2553 * recvit:???
2554 *
2555 * Notes: For full documentation of the return codes from recvit, see
2556 * the block header for the recvit function.
2557 */
2558 int
recvmsg(proc_ref_t p,struct recvmsg_args * uap,int32_ref_t retval)2559 recvmsg(proc_ref_t p, struct recvmsg_args *uap, int32_ref_t retval)
2560 {
2561 __pthread_testcancel(1);
2562 return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2563 retval);
2564 }
2565
2566 int
recvmsg_nocancel(proc_ref_t p,struct recvmsg_nocancel_args * uap,int32_ref_t retval)2567 recvmsg_nocancel(proc_ref_t p, struct recvmsg_nocancel_args *uap,
2568 int32_ref_t retval)
2569 {
2570 struct user32_msghdr msg32;
2571 struct user64_msghdr msg64;
2572 struct user_msghdr user_msg;
2573 caddr_t msghdrp;
2574 int size_of_msghdr;
2575 user_addr_t uiov;
2576 int error;
2577 uio_t auio = NULL;
2578 struct user_iovec *iovp;
2579
2580 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2581
2582 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2583 AUDIT_ARG(fd, uap->s);
2584 if (is_p_64bit_process) {
2585 msghdrp = (caddr_t)&msg64;
2586 size_of_msghdr = sizeof(msg64);
2587 } else {
2588 msghdrp = (caddr_t)&msg32;
2589 size_of_msghdr = sizeof(msg32);
2590 }
2591 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2592 if (error) {
2593 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2594 return error;
2595 }
2596
2597 /* only need to copy if user process is not 64-bit */
2598 if (is_p_64bit_process) {
2599 user_msg.msg_flags = msg64.msg_flags;
2600 user_msg.msg_controllen = msg64.msg_controllen;
2601 user_msg.msg_control = (user_addr_t)msg64.msg_control;
2602 user_msg.msg_iovlen = msg64.msg_iovlen;
2603 user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
2604 user_msg.msg_namelen = msg64.msg_namelen;
2605 user_msg.msg_name = (user_addr_t)msg64.msg_name;
2606 } else {
2607 user_msg.msg_flags = msg32.msg_flags;
2608 user_msg.msg_controllen = msg32.msg_controllen;
2609 user_msg.msg_control = msg32.msg_control;
2610 user_msg.msg_iovlen = msg32.msg_iovlen;
2611 user_msg.msg_iov = msg32.msg_iov;
2612 user_msg.msg_namelen = msg32.msg_namelen;
2613 user_msg.msg_name = msg32.msg_name;
2614 }
2615
2616 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2617 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2618 0, 0, 0, 0);
2619 return EMSGSIZE;
2620 }
2621
2622 user_msg.msg_flags = uap->flags;
2623
2624 /* allocate a uio large enough to hold the number of iovecs passed */
2625 auio = uio_create(user_msg.msg_iovlen, 0,
2626 (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
2627 UIO_READ);
2628 if (auio == NULL) {
2629 error = ENOMEM;
2630 goto done;
2631 }
2632
2633 /*
2634 * get location of iovecs within the uio. then copyin the iovecs from
2635 * user space.
2636 */
2637 iovp = uio_iovsaddr(auio);
2638 if (iovp == NULL) {
2639 error = ENOMEM;
2640 goto done;
2641 }
2642 uiov = user_msg.msg_iov;
2643 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2644 error = copyin_user_iovec_array(uiov,
2645 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2646 user_msg.msg_iovlen, iovp);
2647 if (error) {
2648 goto done;
2649 }
2650
2651 /* finish setup of uio_t */
2652 error = uio_calculateresid(auio);
2653 if (error) {
2654 goto done;
2655 }
2656
2657 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2658 if (!error) {
2659 user_msg.msg_iov = uiov;
2660 if (is_p_64bit_process) {
2661 msg64.msg_flags = user_msg.msg_flags;
2662 msg64.msg_controllen = user_msg.msg_controllen;
2663 msg64.msg_control = user_msg.msg_control;
2664 msg64.msg_iovlen = user_msg.msg_iovlen;
2665 msg64.msg_iov = user_msg.msg_iov;
2666 msg64.msg_namelen = user_msg.msg_namelen;
2667 msg64.msg_name = user_msg.msg_name;
2668 } else {
2669 msg32.msg_flags = user_msg.msg_flags;
2670 msg32.msg_controllen = user_msg.msg_controllen;
2671 msg32.msg_control = (user32_addr_t)user_msg.msg_control;
2672 msg32.msg_iovlen = user_msg.msg_iovlen;
2673 msg32.msg_iov = (user32_addr_t)user_msg.msg_iov;
2674 msg32.msg_namelen = user_msg.msg_namelen;
2675 msg32.msg_name = (user32_addr_t)user_msg.msg_name;
2676 }
2677 error = copyout(msghdrp, uap->msg, size_of_msghdr);
2678 }
2679 done:
2680 if (auio != NULL) {
2681 uio_free(auio);
2682 }
2683 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2684 return error;
2685 }
2686
2687 __attribute__((noinline))
2688 static int
recvmsg_x_array(proc_ref_t p,socket_ref_t so,struct recvmsg_x_args * uap,user_ssize_t * retval)2689 recvmsg_x_array(proc_ref_t p, socket_ref_t so, struct recvmsg_x_args *uap, user_ssize_t *retval)
2690 {
2691 int error = EOPNOTSUPP;
2692 user_msghdr_x_ptr_t user_msg_x = NULL;
2693 recv_msg_elem_ptr_t recv_msg_array = NULL;
2694 user_ssize_t len_before = 0, len_after;
2695 size_t size_of_msghdr;
2696 void_ptr_t umsgp = NULL;
2697 u_int i;
2698 u_int uiocnt;
2699 int flags = uap->flags;
2700
2701 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2702
2703 size_of_msghdr = is_p_64bit_process ?
2704 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2705
2706 /*
2707 * Support only a subset of message flags
2708 */
2709 if (uap->flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA | MSG_NBIO)) {
2710 return EOPNOTSUPP;
2711 }
2712 /*
2713 * Input parameter range check
2714 */
2715 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2716 error = EINVAL;
2717 goto out;
2718 }
2719 if (uap->cnt > somaxrecvmsgx) {
2720 uap->cnt = somaxrecvmsgx > 0 ? somaxrecvmsgx : 1;
2721 }
2722
2723 user_msg_x = kalloc_type(struct user_msghdr_x, uap->cnt,
2724 Z_WAITOK | Z_ZERO);
2725 if (user_msg_x == NULL) {
2726 DBG_PRINTF("%s user_msg_x alloc failed", __func__);
2727 error = ENOMEM;
2728 goto out;
2729 }
2730 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2731 if (recv_msg_array == NULL) {
2732 DBG_PRINTF("%s alloc_recv_msg_array() failed", __func__);
2733 error = ENOMEM;
2734 goto out;
2735 }
2736
2737 umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
2738 if (umsgp == NULL) {
2739 DBG_PRINTF("%s umsgp alloc failed", __func__);
2740 error = ENOMEM;
2741 goto out;
2742 }
2743 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2744 if (error) {
2745 DBG_PRINTF("%s copyin() failed", __func__);
2746 goto out;
2747 }
2748 error = internalize_recv_msghdr_array(umsgp,
2749 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2750 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2751 if (error) {
2752 DBG_PRINTF("%s copyin_user_msghdr_array() failed", __func__);
2753 goto out;
2754 }
2755 /*
2756 * Make sure the size of each message iovec and
2757 * the aggregate size of all the iovec is valid
2758 */
2759 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2760 error = EINVAL;
2761 goto out;
2762 }
2763 /*
2764 * Sanity check on passed arguments
2765 */
2766 for (i = 0; i < uap->cnt; i++) {
2767 struct user_msghdr_x *mp = user_msg_x + i;
2768
2769 if (mp->msg_flags != 0) {
2770 error = EINVAL;
2771 goto out;
2772 }
2773 }
2774 #if CONFIG_MACF_SOCKET_SUBSET
2775 /*
2776 * We check the state without holding the socket lock;
2777 * if a race condition occurs, it would simply result
2778 * in an extra call to the MAC check function.
2779 */
2780 if (!(so->so_state & SS_DEFUNCT) &&
2781 !(so->so_state & SS_ISCONNECTED) &&
2782 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2783 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2784 goto out;
2785 }
2786 #endif /* MAC_SOCKET_SUBSET */
2787
2788 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2789
2790 for (i = 0; i < uap->cnt; i++) {
2791 struct recv_msg_elem *recv_msg_elem;
2792 uio_t auio;
2793 sockaddr_ref_ref_t psa;
2794 struct mbuf **controlp;
2795
2796 recv_msg_elem = recv_msg_array + i;
2797 auio = recv_msg_elem->uio;
2798
2799 /*
2800 * Do not block if we got at least one packet
2801 */
2802 if (i > 0) {
2803 flags |= MSG_DONTWAIT;
2804 }
2805
2806 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2807 &recv_msg_elem->psa : NULL;
2808 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2809 &recv_msg_elem->controlp : NULL;
2810
2811 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2812 auio, NULL, controlp, &flags);
2813 if (error) {
2814 break;
2815 }
2816 /*
2817 * We have some data
2818 */
2819 recv_msg_elem->which |= SOCK_MSG_DATA;
2820 /*
2821 * Set the messages flags for this packet
2822 */
2823 flags &= ~MSG_DONTWAIT;
2824 recv_msg_elem->flags = flags;
2825 /*
2826 * Stop on partial copy
2827 */
2828 if (recv_msg_elem->flags & (MSG_RCVMORE | MSG_TRUNC)) {
2829 break;
2830 }
2831 }
2832
2833 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2834
2835 if (error) {
2836 if (len_after != len_before && (error == ERESTART ||
2837 error == EINTR || error == EWOULDBLOCK)) {
2838 error = 0;
2839 } else {
2840 goto out;
2841 }
2842 }
2843
2844 uiocnt = externalize_recv_msghdr_array(p, so, umsgp,
2845 uap->cnt, user_msg_x, recv_msg_array, &error);
2846 if (error != 0) {
2847 goto out;
2848 }
2849
2850 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2851 if (error) {
2852 DBG_PRINTF("%s copyout() failed", __func__);
2853 goto out;
2854 }
2855 *retval = (int)(uiocnt);
2856
2857 out:
2858 kfree_data(umsgp, uap->cnt * size_of_msghdr);
2859 free_recv_msg_array(recv_msg_array, uap->cnt);
2860 kfree_type(struct user_msghdr_x, uap->cnt, user_msg_x);
2861
2862 return error;
2863 }
2864
2865 int
recvmsg_x(struct proc * p,struct recvmsg_x_args * uap,user_ssize_t * retval)2866 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2867 {
2868 int error = EOPNOTSUPP;
2869 socket_ref_t so;
2870 size_t size_of_msghdrx;
2871 caddr_t msghdrxp;
2872 struct user32_msghdr_x msghdrx32 = {};
2873 struct user64_msghdr_x msghdrx64 = {};
2874 int spacetype;
2875 u_int i;
2876 uio_t auio = NULL;
2877 caddr_t src;
2878 int flags;
2879 struct mbuf *pkt_list = NULL, *m;
2880 struct mbuf *addr_list = NULL, *m_addr;
2881 struct mbuf *ctl_list = NULL, *control;
2882 u_int pktcnt;
2883
2884 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2885
2886 error = file_socket(uap->s, &so);
2887 if (error) {
2888 goto done_no_filedrop;
2889 }
2890 if (so == NULL) {
2891 error = EBADF;
2892 goto done;
2893 }
2894
2895 #if CONFIG_MACF_SOCKET_SUBSET
2896 /*
2897 * We check the state without holding the socket lock;
2898 * if a race condition occurs, it would simply result
2899 * in an extra call to the MAC check function.
2900 */
2901 if (!(so->so_state & SS_DEFUNCT) &&
2902 !(so->so_state & SS_ISCONNECTED) &&
2903 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2904 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2905 goto done;
2906 }
2907 #endif /* MAC_SOCKET_SUBSET */
2908
2909 /*
2910 * With soreceive_m_list, all packets must be uniform, with address and
2911 * control as they are returned in parallel lists and it's only guaranteed
2912 * when pru_send_list is supported
2913 */
2914 if (do_recvmsg_x_donttrunc != 0 || (so->so_options & SO_DONTTRUNC)) {
2915 error = recvmsg_x_array(p, so, uap, retval);
2916 goto done;
2917 }
2918
2919 /*
2920 * Input parameter range check
2921 */
2922 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2923 error = EINVAL;
2924 goto done;
2925 }
2926 if (uap->cnt > somaxrecvmsgx) {
2927 uap->cnt = somaxrecvmsgx > 0 ? somaxrecvmsgx : 1;
2928 }
2929
2930 if (IS_64BIT_PROCESS(p)) {
2931 msghdrxp = (caddr_t)&msghdrx64;
2932 size_of_msghdrx = sizeof(struct user64_msghdr_x);
2933 spacetype = UIO_USERSPACE64;
2934 } else {
2935 msghdrxp = (caddr_t)&msghdrx32;
2936 size_of_msghdrx = sizeof(struct user32_msghdr_x);
2937 spacetype = UIO_USERSPACE32;
2938 }
2939 src = (caddr_t)uap->msgp;
2940
2941 flags = uap->flags;
2942
2943 /*
2944 * Only allow MSG_DONTWAIT
2945 */
2946 if ((flags & ~(MSG_DONTWAIT | MSG_NBIO)) != 0) {
2947 error = EINVAL;
2948 goto done;
2949 }
2950
2951 /*
2952 * Receive list of packet in a single call
2953 */
2954 pktcnt = uap->cnt;
2955 error = soreceive_m_list(so, &pktcnt, &addr_list, &pkt_list, &ctl_list,
2956 &flags);
2957 if (error != 0) {
2958 if (pktcnt != 0 && (error == ERESTART ||
2959 error == EINTR || error == EWOULDBLOCK)) {
2960 error = 0;
2961 } else {
2962 goto done;
2963 }
2964 }
2965
2966 m_addr = addr_list;
2967 m = pkt_list;
2968 control = ctl_list;
2969
2970 for (i = 0; i < pktcnt; i++) {
2971 struct user_msghdr user_msg;
2972 ssize_t len;
2973 struct user_iovec *iovp;
2974 struct mbuf *n;
2975
2976 if (m->m_type != MT_OOBDATA && m->m_type != MT_DATA &&
2977 m->m_type != MT_HEADER) {
2978 panic("%s: m %p m_type %d != MT_DATA", __func__, m, m->m_type);
2979 }
2980
2981 error = copyin((user_addr_t)(src + i * size_of_msghdrx),
2982 msghdrxp, size_of_msghdrx);
2983 if (error) {
2984 DBG_PRINTF("%s copyin() msghdrx failed %d\n",
2985 __func__, error);
2986 goto done;
2987 }
2988 if (spacetype == UIO_USERSPACE64) {
2989 user_msg.msg_name = msghdrx64.msg_name;
2990 user_msg.msg_namelen = msghdrx64.msg_namelen;
2991 user_msg.msg_iov = msghdrx64.msg_iov;
2992 user_msg.msg_iovlen = msghdrx64.msg_iovlen;
2993 user_msg.msg_control = msghdrx64.msg_control;
2994 user_msg.msg_controllen = msghdrx64.msg_controllen;
2995 } else {
2996 user_msg.msg_name = msghdrx32.msg_name;
2997 user_msg.msg_namelen = msghdrx32.msg_namelen;
2998 user_msg.msg_iov = msghdrx32.msg_iov;
2999 user_msg.msg_iovlen = msghdrx32.msg_iovlen;
3000 user_msg.msg_control = msghdrx32.msg_control;
3001 user_msg.msg_controllen = msghdrx32.msg_controllen;
3002 }
3003 user_msg.msg_flags = 0;
3004 if (user_msg.msg_iovlen <= 0 ||
3005 user_msg.msg_iovlen > UIO_MAXIOV) {
3006 error = EMSGSIZE;
3007 DBG_PRINTF("%s bad msg_iovlen, error %d\n",
3008 __func__, error);
3009 goto done;
3010 }
3011 /*
3012 * Attempt to reuse the uio if large enough, otherwise we need
3013 * a new one
3014 */
3015 if (auio != NULL) {
3016 if (auio->uio_max_iovs <= user_msg.msg_iovlen) {
3017 uio_reset(auio, 0, spacetype, UIO_READ);
3018 } else {
3019 uio_free(auio);
3020 auio = NULL;
3021 }
3022 }
3023 if (auio == NULL) {
3024 auio = uio_create(user_msg.msg_iovlen, 0, spacetype,
3025 UIO_READ);
3026 if (auio == NULL) {
3027 error = ENOBUFS;
3028 DBG_PRINTF("%s uio_create() failed %d\n",
3029 __func__, error);
3030 goto done;
3031 }
3032 }
3033 /*
3034 * get location of iovecs within the uio then copy the iovecs
3035 * from user space.
3036 */
3037 iovp = uio_iovsaddr(auio);
3038 if (iovp == NULL) {
3039 error = ENOMEM;
3040 DBG_PRINTF("%s uio_iovsaddr() failed %d\n",
3041 __func__, error);
3042 goto done;
3043 }
3044 error = copyin_user_iovec_array(user_msg.msg_iov,
3045 spacetype, user_msg.msg_iovlen, iovp);
3046 if (error != 0) {
3047 DBG_PRINTF("%s copyin_user_iovec_array() failed %d\n",
3048 __func__, error);
3049 goto done;
3050 }
3051 error = uio_calculateresid(auio);
3052 if (error != 0) {
3053 DBG_PRINTF("%s uio_calculateresid() failed %d\n",
3054 __func__, error);
3055 goto done;
3056 }
3057 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
3058
3059 len = uio_resid(auio);
3060 for (n = m; n != NULL; n = n->m_next) {
3061 user_ssize_t resid = uio_resid(auio);
3062 if (resid < n->m_len) {
3063 error = uiomove(mtod(n, caddr_t), (int)n->m_len, auio);
3064 if (error != 0) {
3065 DBG_PRINTF("%s uiomove() failed\n",
3066 __func__);
3067 goto done;
3068 }
3069 flags |= MSG_TRUNC;
3070 break;
3071 }
3072
3073 error = uiomove(mtod(n, caddr_t), (int)n->m_len, auio);
3074 if (error != 0) {
3075 DBG_PRINTF("%s uiomove() failed\n",
3076 __func__);
3077 goto done;
3078 }
3079 }
3080 len -= uio_resid(auio);
3081
3082 if (user_msg.msg_name != 0 && user_msg.msg_namelen != 0) {
3083 error = copyout_maddr(m_addr, user_msg.msg_name,
3084 &user_msg.msg_namelen);
3085 if (error) {
3086 DBG_PRINTF("%s copyout_maddr() failed\n",
3087 __func__);
3088 goto done;
3089 }
3090 }
3091 if (user_msg.msg_control != 0 && user_msg.msg_controllen != 0) {
3092 error = copyout_control(p, control,
3093 user_msg.msg_control, &user_msg.msg_controllen,
3094 &user_msg.msg_flags, so);
3095 if (error) {
3096 DBG_PRINTF("%s copyout_control() failed\n",
3097 __func__);
3098 goto done;
3099 }
3100 }
3101 /*
3102 * Note: the original msg_iovlen and msg_iov do not change
3103 */
3104 if (spacetype == UIO_USERSPACE64) {
3105 msghdrx64.msg_flags = user_msg.msg_flags;
3106 msghdrx64.msg_controllen = user_msg.msg_controllen;
3107 msghdrx64.msg_control = user_msg.msg_control;
3108 msghdrx64.msg_namelen = user_msg.msg_namelen;
3109 msghdrx64.msg_name = user_msg.msg_name;
3110 msghdrx64.msg_datalen = len;
3111 } else {
3112 msghdrx32.msg_flags = user_msg.msg_flags;
3113 msghdrx32.msg_controllen = user_msg.msg_controllen;
3114 msghdrx32.msg_control = (user32_addr_t) user_msg.msg_control;
3115 msghdrx32.msg_name = user_msg.msg_namelen;
3116 msghdrx32.msg_name = (user32_addr_t) user_msg.msg_name;
3117 msghdrx32.msg_datalen = (user32_size_t) len;
3118 }
3119 error = copyout(msghdrxp,
3120 (user_addr_t)(src + i * size_of_msghdrx),
3121 size_of_msghdrx);
3122 if (error) {
3123 DBG_PRINTF("%s copyout() msghdrx failed\n", __func__);
3124 goto done;
3125 }
3126
3127 m = m->m_nextpkt;
3128 if (control != NULL) {
3129 control = control->m_nextpkt;
3130 }
3131 if (m_addr != NULL) {
3132 m_addr = m_addr->m_nextpkt;
3133 }
3134 }
3135
3136 uap->flags = flags;
3137
3138 *retval = (int)i;
3139 done:
3140 file_drop(uap->s);
3141
3142 done_no_filedrop:
3143 if (pkt_list != NULL) {
3144 m_freem_list(pkt_list);
3145 }
3146 if (addr_list != NULL) {
3147 m_freem_list(addr_list);
3148 }
3149 if (ctl_list != NULL) {
3150 m_freem_list(ctl_list);
3151 }
3152 if (auio != NULL) {
3153 uio_free(auio);
3154 }
3155
3156 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
3157
3158 return error;
3159 }
3160
3161 /*
3162 * Returns: 0 Success
3163 * EBADF
3164 * file_socket:ENOTSOCK
3165 * file_socket:EBADF
3166 * soshutdown:EINVAL
3167 * soshutdown:ENOTCONN
3168 * soshutdown:EADDRNOTAVAIL[TCP]
3169 * soshutdown:ENOBUFS[TCP]
3170 * soshutdown:EMSGSIZE[TCP]
3171 * soshutdown:EHOSTUNREACH[TCP]
3172 * soshutdown:ENETUNREACH[TCP]
3173 * soshutdown:ENETDOWN[TCP]
3174 * soshutdown:ENOMEM[TCP]
3175 * soshutdown:EACCES[TCP]
3176 * soshutdown:EMSGSIZE[TCP]
3177 * soshutdown:ENOBUFS[TCP]
3178 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
3179 * soshutdown:??? [other protocol families]
3180 */
3181 /* ARGSUSED */
3182 int
shutdown(__unused proc_ref_t p,struct shutdown_args * uap,__unused int32_ref_t retval)3183 shutdown(__unused proc_ref_t p, struct shutdown_args *uap,
3184 __unused int32_ref_t retval)
3185 {
3186 socket_ref_t so;
3187 int error;
3188
3189 AUDIT_ARG(fd, uap->s);
3190 error = file_socket(uap->s, &so);
3191 if (error) {
3192 return error;
3193 }
3194 if (so == NULL) {
3195 error = EBADF;
3196 goto out;
3197 }
3198 error = soshutdown((struct socket *)so, uap->how);
3199 out:
3200 file_drop(uap->s);
3201 return error;
3202 }
3203
3204 /*
3205 * Returns: 0 Success
3206 * EFAULT
3207 * EINVAL
3208 * EACCES Mandatory Access Control failure
3209 * file_socket:ENOTSOCK
3210 * file_socket:EBADF
3211 * sosetopt:EINVAL
3212 * sosetopt:ENOPROTOOPT
3213 * sosetopt:ENOBUFS
3214 * sosetopt:EDOM
3215 * sosetopt:EFAULT
3216 * sosetopt:EOPNOTSUPP[AF_UNIX]
3217 * sosetopt:???
3218 */
3219 /* ARGSUSED */
3220 int
setsockopt(proc_ref_t p,setsockopt_args_ref_t uap,__unused int32_ref_t retval)3221 setsockopt(proc_ref_t p, setsockopt_args_ref_t uap,
3222 __unused int32_ref_t retval)
3223 {
3224 socket_ref_t so;
3225 struct sockopt sopt;
3226 int error;
3227
3228 AUDIT_ARG(fd, uap->s);
3229 if (uap->val == 0 && uap->valsize != 0) {
3230 return EFAULT;
3231 }
3232 /* No bounds checking on size (it's unsigned) */
3233
3234 error = file_socket(uap->s, &so);
3235 if (error) {
3236 return error;
3237 }
3238
3239 sopt.sopt_dir = SOPT_SET;
3240 sopt.sopt_level = uap->level;
3241 sopt.sopt_name = uap->name;
3242 sopt.sopt_val = uap->val;
3243 sopt.sopt_valsize = uap->valsize;
3244 sopt.sopt_p = p;
3245
3246 if (so == NULL) {
3247 error = EINVAL;
3248 goto out;
3249 }
3250 #if CONFIG_MACF_SOCKET_SUBSET
3251 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
3252 &sopt)) != 0) {
3253 goto out;
3254 }
3255 #endif /* MAC_SOCKET_SUBSET */
3256 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
3257 out:
3258 file_drop(uap->s);
3259 return error;
3260 }
3261
3262
3263
3264 /*
3265 * Returns: 0 Success
3266 * EINVAL
3267 * EBADF
3268 * EACCES Mandatory Access Control failure
3269 * copyin:EFAULT
3270 * copyout:EFAULT
3271 * file_socket:ENOTSOCK
3272 * file_socket:EBADF
3273 * sogetopt:???
3274 */
3275 int
getsockopt(proc_ref_t p,struct getsockopt_args * uap,__unused int32_ref_t retval)3276 getsockopt(proc_ref_t p, struct getsockopt_args *uap,
3277 __unused int32_ref_t retval)
3278 {
3279 int error;
3280 socklen_t valsize;
3281 struct sockopt sopt;
3282 socket_ref_t so;
3283
3284 error = file_socket(uap->s, &so);
3285 if (error) {
3286 return error;
3287 }
3288 if (uap->val) {
3289 error = copyin(uap->avalsize, (caddr_t)&valsize,
3290 sizeof(valsize));
3291 if (error) {
3292 goto out;
3293 }
3294 /* No bounds checking on size (it's unsigned) */
3295 } else {
3296 valsize = 0;
3297 }
3298 sopt.sopt_dir = SOPT_GET;
3299 sopt.sopt_level = uap->level;
3300 sopt.sopt_name = uap->name;
3301 sopt.sopt_val = uap->val;
3302 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
3303 sopt.sopt_p = p;
3304
3305 if (so == NULL) {
3306 error = EBADF;
3307 goto out;
3308 }
3309 #if CONFIG_MACF_SOCKET_SUBSET
3310 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
3311 &sopt)) != 0) {
3312 goto out;
3313 }
3314 #endif /* MAC_SOCKET_SUBSET */
3315 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
3316 if (error == 0) {
3317 valsize = (socklen_t)sopt.sopt_valsize;
3318 error = copyout((caddr_t)&valsize, uap->avalsize,
3319 sizeof(valsize));
3320 }
3321 out:
3322 file_drop(uap->s);
3323 return error;
3324 }
3325
3326
3327 /*
3328 * Get socket name.
3329 *
3330 * Returns: 0 Success
3331 * EBADF
3332 * file_socket:ENOTSOCK
3333 * file_socket:EBADF
3334 * copyin:EFAULT
3335 * copyout:EFAULT
3336 * <pru_sockaddr>:ENOBUFS[TCP]
3337 * <pru_sockaddr>:ECONNRESET[TCP]
3338 * <pru_sockaddr>:EINVAL[AF_UNIX]
3339 * <sf_getsockname>:???
3340 */
3341 /* ARGSUSED */
3342 int
getsockname(__unused proc_ref_t p,struct getsockname_args * uap,__unused int32_ref_t retval)3343 getsockname(__unused proc_ref_t p, struct getsockname_args *uap,
3344 __unused int32_ref_t retval)
3345 {
3346 socket_ref_t so;
3347 sockaddr_ref_t sa;
3348 socklen_t len;
3349 socklen_t sa_len;
3350 int error;
3351
3352 error = file_socket(uap->fdes, &so);
3353 if (error) {
3354 return error;
3355 }
3356 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
3357 if (error) {
3358 goto out;
3359 }
3360 if (so == NULL) {
3361 error = EBADF;
3362 goto out;
3363 }
3364 sa = 0;
3365 socket_lock(so, 1);
3366 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
3367 if (error == 0) {
3368 error = sflt_getsockname(so, &sa);
3369 if (error == EJUSTRETURN) {
3370 error = 0;
3371 }
3372 }
3373 socket_unlock(so, 1);
3374 if (error) {
3375 goto bad;
3376 }
3377 if (sa == 0) {
3378 len = 0;
3379 goto gotnothing;
3380 }
3381
3382 sa_len = sa->sa_len;
3383 len = MIN(len, sa_len);
3384 error = copyout((caddr_t)sa, uap->asa, len);
3385 if (error) {
3386 goto bad;
3387 }
3388 /* return the actual, untruncated address length */
3389 len = sa_len;
3390 gotnothing:
3391 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
3392 bad:
3393 free_sockaddr(sa);
3394 out:
3395 file_drop(uap->fdes);
3396 return error;
3397 }
3398
3399 /*
3400 * Get name of peer for connected socket.
3401 *
3402 * Returns: 0 Success
3403 * EBADF
3404 * EINVAL
3405 * ENOTCONN
3406 * file_socket:ENOTSOCK
3407 * file_socket:EBADF
3408 * copyin:EFAULT
3409 * copyout:EFAULT
3410 * <pru_peeraddr>:???
3411 * <sf_getpeername>:???
3412 */
3413 /* ARGSUSED */
3414 int
getpeername(__unused proc_ref_t p,struct getpeername_args * uap,__unused int32_ref_t retval)3415 getpeername(__unused proc_ref_t p, struct getpeername_args *uap,
3416 __unused int32_ref_t retval)
3417 {
3418 socket_ref_t so;
3419 sockaddr_ref_t sa;
3420 socklen_t len;
3421 socklen_t sa_len;
3422 int error;
3423
3424 error = file_socket(uap->fdes, &so);
3425 if (error) {
3426 return error;
3427 }
3428 if (so == NULL) {
3429 error = EBADF;
3430 goto out;
3431 }
3432
3433 socket_lock(so, 1);
3434
3435 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
3436 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
3437 /* the socket has been shutdown, no more getpeername's */
3438 socket_unlock(so, 1);
3439 error = EINVAL;
3440 goto out;
3441 }
3442
3443 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
3444 socket_unlock(so, 1);
3445 error = ENOTCONN;
3446 goto out;
3447 }
3448 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
3449 if (error) {
3450 socket_unlock(so, 1);
3451 goto out;
3452 }
3453 sa = 0;
3454 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
3455 if (error == 0) {
3456 error = sflt_getpeername(so, &sa);
3457 if (error == EJUSTRETURN) {
3458 error = 0;
3459 }
3460 }
3461 socket_unlock(so, 1);
3462 if (error) {
3463 goto bad;
3464 }
3465 if (sa == 0) {
3466 len = 0;
3467 goto gotnothing;
3468 }
3469 sa_len = sa->sa_len;
3470 len = MIN(len, sa_len);
3471 error = copyout(sa, uap->asa, len);
3472 if (error) {
3473 goto bad;
3474 }
3475 /* return the actual, untruncated address length */
3476 len = sa_len;
3477 gotnothing:
3478 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
3479 bad:
3480 free_sockaddr(sa);
3481 out:
3482 file_drop(uap->fdes);
3483 return error;
3484 }
3485
3486 int
sockargs(struct mbuf ** mp,user_addr_t data,socklen_t buflen,int type)3487 sockargs(struct mbuf **mp, user_addr_t data, socklen_t buflen, int type)
3488 {
3489 sockaddr_ref_t sa;
3490 struct mbuf *m;
3491 int error;
3492 socklen_t alloc_buflen = buflen;
3493
3494 if (buflen > INT_MAX / 2) {
3495 return EINVAL;
3496 }
3497 if (type == MT_SONAME && (buflen > SOCK_MAXADDRLEN ||
3498 buflen < offsetof(struct sockaddr, sa_data[0]))) {
3499 return EINVAL;
3500 }
3501 if (type == MT_CONTROL && buflen < sizeof(struct cmsghdr)) {
3502 return EINVAL;
3503 }
3504
3505 #ifdef __LP64__
3506 /*
3507 * The fd's in the buffer must expand to be pointers, thus we need twice
3508 * as much space
3509 */
3510 if (type == MT_CONTROL) {
3511 alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
3512 sizeof(struct cmsghdr);
3513 }
3514 #endif
3515 if (alloc_buflen > MLEN) {
3516 if (type == MT_SONAME && alloc_buflen <= 112) {
3517 alloc_buflen = MLEN; /* unix domain compat. hack */
3518 } else if (alloc_buflen > MCLBYTES) {
3519 return EINVAL;
3520 }
3521 }
3522 m = m_get(M_WAIT, type);
3523 if (m == NULL) {
3524 return ENOBUFS;
3525 }
3526 if (alloc_buflen > MLEN) {
3527 MCLGET(m, M_WAIT);
3528 if ((m->m_flags & M_EXT) == 0) {
3529 m_free(m);
3530 return ENOBUFS;
3531 }
3532 }
3533 /*
3534 * K64: We still copyin the original buflen because it gets expanded
3535 * later and we lie about the size of the mbuf because it only affects
3536 * unp_* functions
3537 */
3538 m->m_len = buflen;
3539 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
3540 if (error) {
3541 (void) m_free(m);
3542 } else {
3543 *mp = m;
3544 if (type == MT_SONAME) {
3545 VERIFY(buflen <= SOCK_MAXADDRLEN);
3546 sa = mtod(m, sockaddr_ref_t);
3547 sa->sa_len = (__uint8_t)buflen;
3548 }
3549 }
3550 return error;
3551 }
3552
3553 /*
3554 * Given a user_addr_t of length len, allocate and fill out a *sa.
3555 *
3556 * Returns: 0 Success
3557 * ENAMETOOLONG Filename too long
3558 * EINVAL Invalid argument
3559 * ENOMEM Not enough space
3560 * copyin:EFAULT Bad address
3561 */
3562 static int
getsockaddr(struct socket * so,sockaddr_ref_ref_t namp,user_addr_t uaddr,size_t len,boolean_t translate_unspec)3563 getsockaddr(struct socket *so, sockaddr_ref_ref_t namp, user_addr_t uaddr,
3564 size_t len, boolean_t translate_unspec)
3565 {
3566 sockaddr_ref_t sa;
3567 int error;
3568
3569 if (len > SOCK_MAXADDRLEN) {
3570 return ENAMETOOLONG;
3571 }
3572
3573 if (len < offsetof(struct sockaddr, sa_data[0])) {
3574 return EINVAL;
3575 }
3576
3577 sa = (sockaddr_ref_t)alloc_sockaddr(len, Z_WAITOK | Z_NOFAIL);
3578
3579 error = copyin(uaddr, (caddr_t)sa, len);
3580 if (error) {
3581 free_sockaddr(sa);
3582 } else {
3583 /*
3584 * Force sa_family to AF_INET on AF_INET sockets to handle
3585 * legacy applications that use AF_UNSPEC (0). On all other
3586 * sockets we leave it unchanged and let the lower layer
3587 * handle it.
3588 */
3589 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
3590 SOCK_CHECK_DOM(so, PF_INET) &&
3591 len == sizeof(struct sockaddr_in)) {
3592 sa->sa_family = AF_INET;
3593 }
3594 VERIFY(len <= SOCK_MAXADDRLEN);
3595 sa = *&sa;
3596 sa->sa_len = (__uint8_t)len;
3597 *namp = sa;
3598 }
3599 return error;
3600 }
3601
3602 static int
getsockaddr_s(struct socket * so,sockaddr_storage_ref_t ss,user_addr_t uaddr,size_t len,boolean_t translate_unspec)3603 getsockaddr_s(struct socket *so, sockaddr_storage_ref_t ss,
3604 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
3605 {
3606 int error;
3607
3608 if (ss == NULL || uaddr == USER_ADDR_NULL ||
3609 len < offsetof(struct sockaddr, sa_data[0])) {
3610 return EINVAL;
3611 }
3612
3613 /*
3614 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
3615 * so the check here is inclusive.
3616 */
3617 if (len > sizeof(*ss)) {
3618 return ENAMETOOLONG;
3619 }
3620
3621 bzero(ss, sizeof(*ss));
3622 error = copyin(uaddr, (caddr_t)ss, len);
3623 if (error == 0) {
3624 /*
3625 * Force sa_family to AF_INET on AF_INET sockets to handle
3626 * legacy applications that use AF_UNSPEC (0). On all other
3627 * sockets we leave it unchanged and let the lower layer
3628 * handle it.
3629 */
3630 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
3631 SOCK_CHECK_DOM(so, PF_INET) &&
3632 len == sizeof(struct sockaddr_in)) {
3633 ss->ss_family = AF_INET;
3634 }
3635
3636 ss->ss_len = (__uint8_t)len;
3637 }
3638 return error;
3639 }
3640
3641 #if DEBUG || DEVELOPMENT
3642 int
internalize_user_msghdr_array(const void_ptr_t src,int spacetype,int direction,u_int count,user_msghdr_x_ptr_t dst,uio_ref_ptr_t uiop)3643 internalize_user_msghdr_array(const void_ptr_t src, int spacetype, int direction,
3644 u_int count, user_msghdr_x_ptr_t dst, uio_ref_ptr_t uiop)
3645 {
3646 int error = 0;
3647 u_int i;
3648 u_int namecnt = 0;
3649 u_int ctlcnt = 0;
3650
3651 for (i = 0; i < count; i++) {
3652 uio_t auio;
3653 struct user_iovec *iovp;
3654 struct user_msghdr_x *user_msg = dst + i;
3655
3656 if (spacetype == UIO_USERSPACE64) {
3657 const struct user64_msghdr_x *msghdr64;
3658
3659 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3660
3661 user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
3662 user_msg->msg_namelen = msghdr64->msg_namelen;
3663 user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
3664 user_msg->msg_iovlen = msghdr64->msg_iovlen;
3665 user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
3666 user_msg->msg_controllen = msghdr64->msg_controllen;
3667 user_msg->msg_flags = msghdr64->msg_flags;
3668 user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
3669 } else {
3670 const struct user32_msghdr_x *msghdr32;
3671
3672 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3673
3674 user_msg->msg_name = msghdr32->msg_name;
3675 user_msg->msg_namelen = msghdr32->msg_namelen;
3676 user_msg->msg_iov = msghdr32->msg_iov;
3677 user_msg->msg_iovlen = msghdr32->msg_iovlen;
3678 user_msg->msg_control = msghdr32->msg_control;
3679 user_msg->msg_controllen = msghdr32->msg_controllen;
3680 user_msg->msg_flags = msghdr32->msg_flags;
3681 user_msg->msg_datalen = msghdr32->msg_datalen;
3682 }
3683
3684 if (user_msg->msg_iovlen <= 0 ||
3685 user_msg->msg_iovlen > UIO_MAXIOV) {
3686 error = EMSGSIZE;
3687 goto done;
3688 }
3689 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
3690 direction);
3691 if (auio == NULL) {
3692 error = ENOMEM;
3693 goto done;
3694 }
3695 uiop[i] = auio;
3696
3697 iovp = uio_iovsaddr(auio);
3698 if (iovp == NULL) {
3699 error = ENOMEM;
3700 goto done;
3701 }
3702 error = copyin_user_iovec_array(user_msg->msg_iov,
3703 spacetype, user_msg->msg_iovlen, iovp);
3704 if (error) {
3705 goto done;
3706 }
3707 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3708
3709 error = uio_calculateresid(auio);
3710 if (error) {
3711 goto done;
3712 }
3713 user_msg->msg_datalen = uio_resid(auio);
3714
3715 if (user_msg->msg_name && user_msg->msg_namelen) {
3716 namecnt++;
3717 }
3718 if (user_msg->msg_control && user_msg->msg_controllen) {
3719 ctlcnt++;
3720 }
3721 }
3722 done:
3723
3724 return error;
3725 }
3726 #endif /* DEBUG || DEVELOPMENT */
3727
3728 int
internalize_recv_msghdr_array(const void_ptr_t src,int spacetype,int direction,u_int count,user_msghdr_x_ptr_t dst,recv_msg_elem_ptr_t recv_msg_array)3729 internalize_recv_msghdr_array(const void_ptr_t src, int spacetype, int direction,
3730 u_int count, user_msghdr_x_ptr_t dst,
3731 recv_msg_elem_ptr_t recv_msg_array)
3732 {
3733 int error = 0;
3734 u_int i;
3735
3736 for (i = 0; i < count; i++) {
3737 struct user_iovec *iovp;
3738 struct user_msghdr_x *user_msg = dst + i;
3739 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3740
3741 if (spacetype == UIO_USERSPACE64) {
3742 const struct user64_msghdr_x *msghdr64;
3743
3744 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3745
3746 user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
3747 user_msg->msg_namelen = msghdr64->msg_namelen;
3748 user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
3749 user_msg->msg_iovlen = msghdr64->msg_iovlen;
3750 user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
3751 user_msg->msg_controllen = msghdr64->msg_controllen;
3752 user_msg->msg_flags = msghdr64->msg_flags;
3753 user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
3754 } else {
3755 const struct user32_msghdr_x *msghdr32;
3756
3757 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3758
3759 user_msg->msg_name = msghdr32->msg_name;
3760 user_msg->msg_namelen = msghdr32->msg_namelen;
3761 user_msg->msg_iov = msghdr32->msg_iov;
3762 user_msg->msg_iovlen = msghdr32->msg_iovlen;
3763 user_msg->msg_control = msghdr32->msg_control;
3764 user_msg->msg_controllen = msghdr32->msg_controllen;
3765 user_msg->msg_flags = msghdr32->msg_flags;
3766 user_msg->msg_datalen = msghdr32->msg_datalen;
3767 }
3768
3769 if (user_msg->msg_iovlen <= 0 ||
3770 user_msg->msg_iovlen > UIO_MAXIOV) {
3771 error = EMSGSIZE;
3772 goto done;
3773 }
3774 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3775 spacetype, direction);
3776 if (recv_msg_elem->uio == NULL) {
3777 error = ENOMEM;
3778 goto done;
3779 }
3780
3781 iovp = uio_iovsaddr(recv_msg_elem->uio);
3782 if (iovp == NULL) {
3783 error = ENOMEM;
3784 goto done;
3785 }
3786 error = copyin_user_iovec_array(user_msg->msg_iov,
3787 spacetype, user_msg->msg_iovlen, iovp);
3788 if (error) {
3789 goto done;
3790 }
3791 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3792
3793 error = uio_calculateresid(recv_msg_elem->uio);
3794 if (error) {
3795 goto done;
3796 }
3797 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3798
3799 if (user_msg->msg_name && user_msg->msg_namelen) {
3800 recv_msg_elem->which |= SOCK_MSG_SA;
3801 }
3802 if (user_msg->msg_control && user_msg->msg_controllen) {
3803 recv_msg_elem->which |= SOCK_MSG_CONTROL;
3804 }
3805 }
3806 done:
3807
3808 return error;
3809 }
3810
3811 #if DEBUG || DEVELOPMENT
3812 void
externalize_user_msghdr_array(void_ptr_t dst,int spacetype,int direction,u_int count,const user_msghdr_x_ptr_t src,uio_ref_ptr_t uiop)3813 externalize_user_msghdr_array(void_ptr_t dst, int spacetype, int direction,
3814 u_int count, const user_msghdr_x_ptr_t src, uio_ref_ptr_t uiop)
3815 {
3816 #pragma unused(direction)
3817 u_int i;
3818
3819 for (i = 0; i < count; i++) {
3820 const struct user_msghdr_x *user_msg = src + i;
3821 uio_t auio = uiop[i];
3822 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3823
3824 if (spacetype == UIO_USERSPACE64) {
3825 struct user64_msghdr_x *msghdr64;
3826
3827 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3828
3829 msghdr64->msg_flags = user_msg->msg_flags;
3830 msghdr64->msg_datalen = len;
3831 } else {
3832 struct user32_msghdr_x *msghdr32;
3833
3834 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3835
3836 msghdr32->msg_flags = user_msg->msg_flags;
3837 msghdr32->msg_datalen = (user32_size_t)len;
3838 }
3839 }
3840 }
3841 #endif /* DEBUG || DEVELOPMENT */
3842
3843 u_int
externalize_recv_msghdr_array(proc_ref_t p,socket_ref_t so,void_ptr_t dst,u_int count,user_msghdr_x_ptr_t src,recv_msg_elem_ptr_t recv_msg_array,int_ref_t ret_error)3844 externalize_recv_msghdr_array(proc_ref_t p, socket_ref_t so, void_ptr_t dst,
3845 u_int count, user_msghdr_x_ptr_t src,
3846 recv_msg_elem_ptr_t recv_msg_array, int_ref_t ret_error)
3847 {
3848 u_int i;
3849 u_int retcnt = 0;
3850 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3851
3852 *ret_error = 0;
3853
3854 for (i = 0; i < count; i++) {
3855 struct user_msghdr_x *user_msg = src + i;
3856 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3857 user_ssize_t len = 0;
3858 int error;
3859
3860 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3861
3862 if ((recv_msg_elem->which & SOCK_MSG_DATA)) {
3863 retcnt++;
3864
3865 if (recv_msg_elem->which & SOCK_MSG_SA) {
3866 error = copyout_sa(recv_msg_elem->psa, user_msg->msg_name,
3867 &user_msg->msg_namelen);
3868 if (error != 0) {
3869 *ret_error = error;
3870 return 0;
3871 }
3872 }
3873 if (recv_msg_elem->which & SOCK_MSG_CONTROL) {
3874 error = copyout_control(p, recv_msg_elem->controlp,
3875 user_msg->msg_control, &user_msg->msg_controllen,
3876 &recv_msg_elem->flags, so);
3877 if (error != 0) {
3878 *ret_error = error;
3879 return 0;
3880 }
3881 }
3882 }
3883
3884 if (spacetype == UIO_USERSPACE64) {
3885 struct user64_msghdr_x *msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3886
3887 msghdr64->msg_namelen = user_msg->msg_namelen;
3888 msghdr64->msg_controllen = user_msg->msg_controllen;
3889 msghdr64->msg_flags = recv_msg_elem->flags;
3890 msghdr64->msg_datalen = len;
3891 } else {
3892 struct user32_msghdr_x *msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3893
3894 msghdr32->msg_namelen = user_msg->msg_namelen;
3895 msghdr32->msg_controllen = user_msg->msg_controllen;
3896 msghdr32->msg_flags = recv_msg_elem->flags;
3897 msghdr32->msg_datalen = (user32_size_t)len;
3898 }
3899 }
3900 return retcnt;
3901 }
3902
3903 #if DEBUG || DEVELOPMENT
3904 void
free_uio_array(uio_ref_ptr_t uiop,u_int count)3905 free_uio_array(uio_ref_ptr_t uiop, u_int count)
3906 {
3907 u_int i;
3908
3909 for (i = 0; i < count; i++) {
3910 if (uiop[i] != NULL) {
3911 uio_free(uiop[i]);
3912 }
3913 }
3914 }
3915 #endif /* DEBUG || DEVELOPMENT */
3916
3917 /* Extern linkage requires using __counted_by instead of bptr */
3918 __private_extern__ user_ssize_t
uio_array_resid(uio_ref_t * __counted_by (count)uiop,u_int count)3919 uio_array_resid(uio_ref_t * __counted_by(count)uiop, u_int count)
3920 {
3921 user_ssize_t len = 0;
3922 u_int i;
3923
3924 for (i = 0; i < count; i++) {
3925 struct uio *auio = uiop[i];
3926
3927 if (auio != NULL) {
3928 len += uio_resid(auio);
3929 }
3930 }
3931 return len;
3932 }
3933
3934 #if DEBUG || DEVELOPMENT
3935 static boolean_t
uio_array_is_valid(uio_ref_ptr_t uiop,u_int count)3936 uio_array_is_valid(uio_ref_ptr_t uiop, u_int count)
3937 {
3938 user_ssize_t len = 0;
3939 u_int i;
3940
3941 for (i = 0; i < count; i++) {
3942 struct uio *auio = uiop[i];
3943
3944 if (auio != NULL) {
3945 user_ssize_t resid = uio_resid(auio);
3946
3947 /*
3948 * Sanity check on the validity of the iovec:
3949 * no point of going over sb_max
3950 */
3951 if (resid < 0 || resid > (user_ssize_t)sb_max) {
3952 return false;
3953 }
3954
3955 len += resid;
3956 if (len < 0 || len > (user_ssize_t)sb_max) {
3957 return false;
3958 }
3959 }
3960 }
3961 return true;
3962 }
3963 #endif /* DEBUG || DEVELOPMENT */
3964
3965 recv_msg_elem_ptr_t
alloc_recv_msg_array(u_int count)3966 alloc_recv_msg_array(u_int count)
3967 {
3968 return kalloc_type(struct recv_msg_elem, count, Z_WAITOK | Z_ZERO);
3969 }
3970
3971 void
free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array,u_int count)3972 free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3973 {
3974 if (recv_msg_array == NULL) {
3975 return;
3976 }
3977 for (uint32_t i = 0; i < count; i++) {
3978 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3979
3980 if (recv_msg_elem->uio != NULL) {
3981 uio_free(recv_msg_elem->uio);
3982 }
3983 free_sockaddr(recv_msg_elem->psa);
3984 if (recv_msg_elem->controlp != NULL) {
3985 m_freem(recv_msg_elem->controlp);
3986 }
3987 }
3988 kfree_type(struct recv_msg_elem, count, recv_msg_array);
3989 }
3990
3991
3992 /* Extern linkage requires using __counted_by instead of bptr */
3993 __private_extern__ user_ssize_t
recv_msg_array_resid(struct recv_msg_elem * __counted_by (count)recv_msg_array,u_int count)3994 recv_msg_array_resid(struct recv_msg_elem * __counted_by(count)recv_msg_array, u_int count)
3995 {
3996 user_ssize_t len = 0;
3997 u_int i;
3998
3999 for (i = 0; i < count; i++) {
4000 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
4001
4002 if (recv_msg_elem->uio != NULL) {
4003 len += uio_resid(recv_msg_elem->uio);
4004 }
4005 }
4006 return len;
4007 }
4008
4009 int
recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array,u_int count)4010 recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array, u_int count)
4011 {
4012 user_ssize_t len = 0;
4013 u_int i;
4014
4015 for (i = 0; i < count; i++) {
4016 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
4017
4018 if (recv_msg_elem->uio != NULL) {
4019 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
4020
4021 /*
4022 * Sanity check on the validity of the iovec:
4023 * no point of going over sb_max
4024 */
4025 if (resid < 0 || (u_int32_t)resid > sb_max) {
4026 return 0;
4027 }
4028
4029 len += resid;
4030 if (len < 0 || (u_int32_t)len > sb_max) {
4031 return 0;
4032 }
4033 }
4034 }
4035 return 1;
4036 }
4037
4038 #if SENDFILE
4039
4040 #define SFUIOBUFS 64
4041
4042 /* Macros to compute the number of mbufs needed depending on cluster size */
4043 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
4044 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
4045
4046 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
4047 #define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
4048
4049 /* Upper send limit in the number of mbuf clusters */
4050 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
4051 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
4052
4053 static void
alloc_sendpkt(int how,size_t pktlen,unsigned int * maxchunks,mbuf_ref_ref_t m,boolean_t jumbocl)4054 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
4055 mbuf_ref_ref_t m, boolean_t jumbocl)
4056 {
4057 unsigned int needed;
4058
4059 if (pktlen == 0) {
4060 panic("%s: pktlen (%ld) must be non-zero", __func__, pktlen);
4061 }
4062
4063 /*
4064 * Try to allocate for the whole thing. Since we want full control
4065 * over the buffer size and be able to accept partial result, we can't
4066 * use mbuf_allocpacket(). The logic below is similar to sosend().
4067 */
4068 *m = NULL;
4069 if (pktlen > MBIGCLBYTES && jumbocl) {
4070 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
4071 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
4072 }
4073 if (*m == NULL) {
4074 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
4075 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
4076 }
4077
4078 /*
4079 * Our previous attempt(s) at allocation had failed; the system
4080 * may be short on mbufs, and we want to block until they are
4081 * available. This time, ask just for 1 mbuf and don't return
4082 * until we get it.
4083 */
4084 if (*m == NULL) {
4085 needed = 1;
4086 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
4087 }
4088 if (*m == NULL) {
4089 panic("%s: blocking allocation returned NULL", __func__);
4090 }
4091
4092 *maxchunks = needed;
4093 }
4094
4095 /*
4096 * sendfile(2).
4097 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
4098 * struct sf_hdtr *hdtr, int flags)
4099 *
4100 * Send a file specified by 'fd' and starting at 'offset' to a socket
4101 * specified by 's'. Send only '*nbytes' of the file or until EOF if
4102 * *nbytes == 0. Optionally add a header and/or trailer to the socket
4103 * output. If specified, write the total number of bytes sent into *nbytes.
4104 */
4105 int
sendfile(proc_ref_t p,struct sendfile_args * uap,__unused int * retval)4106 sendfile(proc_ref_t p, struct sendfile_args *uap, __unused int *retval)
4107 {
4108 fileproc_ref_t fp;
4109 vnode_ref_t vp;
4110 socket_ref_t so;
4111 struct writev_nocancel_args nuap;
4112 user_ssize_t writev_retval;
4113 struct user_sf_hdtr user_hdtr;
4114 struct user32_sf_hdtr user32_hdtr;
4115 struct user64_sf_hdtr user64_hdtr;
4116 off_t off, xfsize;
4117 off_t nbytes = 0, sbytes = 0;
4118 int error = 0;
4119 size_t sizeof_hdtr;
4120 off_t file_size;
4121 struct vfs_context context = *vfs_context_current();
4122
4123 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
4124
4125 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
4126 0, 0, 0, 0);
4127
4128 AUDIT_ARG(fd, uap->fd);
4129 AUDIT_ARG(value32, uap->s);
4130
4131 /*
4132 * Do argument checking. Must be a regular file in, stream
4133 * type and connected socket out, positive offset.
4134 */
4135 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
4136 goto done;
4137 }
4138 if ((fp->f_flag & FREAD) == 0) {
4139 error = EBADF;
4140 goto done1;
4141 }
4142 if (vnode_isreg(vp) == 0) {
4143 error = ENOTSUP;
4144 goto done1;
4145 }
4146 error = file_socket(uap->s, &so);
4147 if (error) {
4148 goto done1;
4149 }
4150 if (so == NULL) {
4151 error = EBADF;
4152 goto done2;
4153 }
4154 if (so->so_type != SOCK_STREAM) {
4155 error = EINVAL;
4156 goto done2;
4157 }
4158 if ((so->so_state & SS_ISCONNECTED) == 0) {
4159 error = ENOTCONN;
4160 goto done2;
4161 }
4162 if (uap->offset < 0) {
4163 error = EINVAL;
4164 goto done2;
4165 }
4166 if (uap->nbytes == USER_ADDR_NULL) {
4167 error = EINVAL;
4168 goto done2;
4169 }
4170 if (uap->flags != 0) {
4171 error = EINVAL;
4172 goto done2;
4173 }
4174
4175 context.vc_ucred = fp->fp_glob->fg_cred;
4176
4177 #if CONFIG_MACF_SOCKET_SUBSET
4178 /* JMM - fetch connected sockaddr? */
4179 error = mac_socket_check_send(context.vc_ucred, so, NULL);
4180 if (error) {
4181 goto done2;
4182 }
4183 #endif
4184
4185 /*
4186 * Get number of bytes to send
4187 * Should it applies to size of header and trailer?
4188 */
4189 error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
4190 if (error) {
4191 goto done2;
4192 }
4193
4194 /*
4195 * If specified, get the pointer to the sf_hdtr struct for
4196 * any headers/trailers.
4197 */
4198 if (uap->hdtr != USER_ADDR_NULL) {
4199 caddr_t hdtrp;
4200
4201 bzero(&user_hdtr, sizeof(user_hdtr));
4202 if (is_p_64bit_process) {
4203 hdtrp = (caddr_t)&user64_hdtr;
4204 sizeof_hdtr = sizeof(user64_hdtr);
4205 } else {
4206 hdtrp = (caddr_t)&user32_hdtr;
4207 sizeof_hdtr = sizeof(user32_hdtr);
4208 }
4209 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
4210 if (error) {
4211 goto done2;
4212 }
4213 if (is_p_64bit_process) {
4214 user_hdtr.headers = user64_hdtr.headers;
4215 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
4216 user_hdtr.trailers = user64_hdtr.trailers;
4217 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
4218 } else {
4219 user_hdtr.headers = user32_hdtr.headers;
4220 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
4221 user_hdtr.trailers = user32_hdtr.trailers;
4222 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
4223 }
4224
4225 /*
4226 * Send any headers. Wimp out and use writev(2).
4227 */
4228 if (user_hdtr.headers != USER_ADDR_NULL) {
4229 bzero(&nuap, sizeof(struct writev_args));
4230 nuap.fd = uap->s;
4231 nuap.iovp = user_hdtr.headers;
4232 nuap.iovcnt = user_hdtr.hdr_cnt;
4233 error = writev_nocancel(p, &nuap, &writev_retval);
4234 if (error) {
4235 goto done2;
4236 }
4237 sbytes += writev_retval;
4238 }
4239 }
4240
4241 /*
4242 * Get the file size for 2 reasons:
4243 * 1. We don't want to allocate more mbufs than necessary
4244 * 2. We don't want to read past the end of file
4245 */
4246 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
4247 goto done2;
4248 }
4249
4250 /*
4251 * Simply read file data into a chain of mbufs that used with scatter
4252 * gather reads. We're not (yet?) setup to use zero copy external
4253 * mbufs that point to the file pages.
4254 */
4255 socket_lock(so, 1);
4256 error = sblock(&so->so_snd, SBL_WAIT);
4257 if (error) {
4258 socket_unlock(so, 1);
4259 goto done2;
4260 }
4261 for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
4262 mbuf_ref_t m0 = NULL;
4263 mbuf_t m;
4264 unsigned int nbufs = SFUIOBUFS, i;
4265 uio_t auio;
4266 UIO_STACKBUF(uio_buf, SFUIOBUFS); /* 1KB !!! */
4267 size_t uiolen;
4268 user_ssize_t rlen;
4269 off_t pgoff;
4270 size_t pktlen;
4271 boolean_t jumbocl;
4272
4273 /*
4274 * Calculate the amount to transfer.
4275 * Align to round number of pages.
4276 * Not to exceed send socket buffer,
4277 * the EOF, or the passed in nbytes.
4278 */
4279 xfsize = sbspace(&so->so_snd);
4280
4281 if (xfsize <= 0) {
4282 if (so->so_state & SS_CANTSENDMORE) {
4283 error = EPIPE;
4284 goto done3;
4285 } else if ((so->so_state & SS_NBIO)) {
4286 error = EAGAIN;
4287 goto done3;
4288 } else {
4289 xfsize = PAGE_SIZE;
4290 }
4291 }
4292
4293 if (xfsize > SENDFILE_MAX_BYTES) {
4294 xfsize = SENDFILE_MAX_BYTES;
4295 } else if (xfsize > PAGE_SIZE) {
4296 xfsize = trunc_page(xfsize);
4297 }
4298 pgoff = off & PAGE_MASK_64;
4299 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
4300 xfsize = PAGE_SIZE_64 - pgoff;
4301 }
4302 if (nbytes && xfsize > (nbytes - sbytes)) {
4303 xfsize = nbytes - sbytes;
4304 }
4305 if (xfsize <= 0) {
4306 break;
4307 }
4308 if (off + xfsize > file_size) {
4309 xfsize = file_size - off;
4310 }
4311 if (xfsize <= 0) {
4312 break;
4313 }
4314
4315 /*
4316 * Attempt to use larger than system page-size clusters for
4317 * large writes only if there is a jumbo cluster pool and
4318 * if the socket is marked accordingly.
4319 */
4320 jumbocl = sosendjcl && njcl > 0 &&
4321 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
4322
4323 socket_unlock(so, 0);
4324 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
4325 pktlen = mbuf_pkthdr_maxlen(m0);
4326 if (pktlen < (size_t)xfsize) {
4327 xfsize = pktlen;
4328 }
4329
4330 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
4331 UIO_READ, &uio_buf[0], sizeof(uio_buf));
4332 if (auio == NULL) {
4333 DBG_PRINTF("sendfile failed. nbufs = %d. %s", nbufs,
4334 "File a radar related to rdar://10146739.\n");
4335 mbuf_freem(m0);
4336 error = ENXIO;
4337 socket_lock(so, 0);
4338 goto done3;
4339 }
4340
4341 for (i = 0, m = m0, uiolen = 0;
4342 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
4343 i++, m = mbuf_next(m)) {
4344 size_t mlen = mbuf_maxlen(m);
4345
4346 if (mlen + uiolen > (size_t)xfsize) {
4347 mlen = xfsize - uiolen;
4348 }
4349 mbuf_setlen(m, mlen);
4350 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
4351 mlen);
4352 uiolen += mlen;
4353 }
4354
4355 if (xfsize != uio_resid(auio)) {
4356 DBG_PRINTF("sendfile: xfsize: %lld != uio_resid(auio): "
4357 "%lld\n", xfsize, (long long)uio_resid(auio));
4358 }
4359
4360 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
4361 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
4362 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
4363 error = fo_read(fp, auio, FOF_OFFSET, &context);
4364 socket_lock(so, 0);
4365 if (error != 0) {
4366 if (uio_resid(auio) != xfsize && (error == ERESTART ||
4367 error == EINTR || error == EWOULDBLOCK)) {
4368 error = 0;
4369 } else {
4370 mbuf_freem(m0);
4371 goto done3;
4372 }
4373 }
4374 xfsize -= uio_resid(auio);
4375 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
4376 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
4377 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
4378
4379 if (xfsize == 0) {
4380 break;
4381 }
4382 if (xfsize + off > file_size) {
4383 DBG_PRINTF("sendfile: xfsize: %lld + off: %lld > file_size:"
4384 "%lld\n", xfsize, off, file_size);
4385 }
4386 for (i = 0, m = m0, rlen = 0;
4387 i < nbufs && m != NULL && rlen < xfsize;
4388 i++, m = mbuf_next(m)) {
4389 size_t mlen = mbuf_maxlen(m);
4390
4391 if (rlen + mlen > (size_t)xfsize) {
4392 mlen = xfsize - rlen;
4393 }
4394 mbuf_setlen(m, mlen);
4395
4396 rlen += mlen;
4397 }
4398 mbuf_pkthdr_setlen(m0, xfsize);
4399
4400 retry_space:
4401 /*
4402 * Make sure that the socket is still able to take more data.
4403 * CANTSENDMORE being true usually means that the connection
4404 * was closed. so_error is true when an error was sensed after
4405 * a previous send.
4406 * The state is checked after the page mapping and buffer
4407 * allocation above since those operations may block and make
4408 * any socket checks stale. From this point forward, nothing
4409 * blocks before the pru_send (or more accurately, any blocking
4410 * results in a loop back to here to re-check).
4411 */
4412 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
4413 if (so->so_state & SS_CANTSENDMORE) {
4414 error = EPIPE;
4415 } else {
4416 error = so->so_error;
4417 so->so_error = 0;
4418 }
4419 m_freem(m0);
4420 goto done3;
4421 }
4422 /*
4423 * Wait for socket space to become available. We do this just
4424 * after checking the connection state above in order to avoid
4425 * a race condition with sbwait().
4426 */
4427 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
4428 if (so->so_state & SS_NBIO) {
4429 m_freem(m0);
4430 error = EAGAIN;
4431 goto done3;
4432 }
4433 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
4434 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
4435 error = sbwait(&so->so_snd);
4436 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
4437 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
4438 /*
4439 * An error from sbwait usually indicates that we've
4440 * been interrupted by a signal. If we've sent anything
4441 * then return bytes sent, otherwise return the error.
4442 */
4443 if (error) {
4444 m_freem(m0);
4445 goto done3;
4446 }
4447 goto retry_space;
4448 }
4449
4450 mbuf_ref_t control = NULL;
4451 {
4452 /*
4453 * Socket filter processing
4454 */
4455
4456 error = sflt_data_out(so, NULL, &m0, &control, 0);
4457 if (error) {
4458 if (error == EJUSTRETURN) {
4459 error = 0;
4460 continue;
4461 }
4462 goto done3;
4463 }
4464 /*
4465 * End Socket filter processing
4466 */
4467 }
4468 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
4469 uap->s, 0, 0, 0, 0);
4470 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
4471 NULL, control, p);
4472 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
4473 uap->s, 0, 0, 0, 0);
4474 if (error) {
4475 goto done3;
4476 }
4477 }
4478 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
4479 /*
4480 * Send trailers. Wimp out and use writev(2).
4481 */
4482 if (uap->hdtr != USER_ADDR_NULL &&
4483 user_hdtr.trailers != USER_ADDR_NULL) {
4484 bzero(&nuap, sizeof(struct writev_args));
4485 nuap.fd = uap->s;
4486 nuap.iovp = user_hdtr.trailers;
4487 nuap.iovcnt = user_hdtr.trl_cnt;
4488 error = writev_nocancel(p, &nuap, &writev_retval);
4489 if (error) {
4490 goto done2;
4491 }
4492 sbytes += writev_retval;
4493 }
4494 done2:
4495 file_drop(uap->s);
4496 done1:
4497 file_drop(uap->fd);
4498 done:
4499 if (uap->nbytes != USER_ADDR_NULL) {
4500 /* XXX this appears bogus for some early failure conditions */
4501 copyout(&sbytes, uap->nbytes, sizeof(off_t));
4502 }
4503 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
4504 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
4505 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
4506 return error;
4507 done3:
4508 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
4509 goto done2;
4510 }
4511
4512
4513 #endif /* SENDFILE */
4514