1 /*
2 * Copyright (c) 2000-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65 /*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72 #include <sys/cdefs.h>
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/filedesc.h>
76 #include <sys/proc_internal.h>
77 #include <sys/file_internal.h>
78 #include <sys/vnode_internal.h>
79 #include <sys/malloc.h>
80 #include <sys/mcache.h>
81 #include <sys/mbuf.h>
82 #include <kern/locks.h>
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/signalvar.h>
86 #include <sys/socket.h>
87 #include <sys/socketvar.h>
88 #include <sys/kernel.h>
89 #include <sys/uio_internal.h>
90 #include <sys/kauth.h>
91 #include <kern/task.h>
92 #include <sys/priv.h>
93 #include <sys/sysctl.h>
94 #include <sys/sys_domain.h>
95 #include <sys/types.h>
96
97 #include <security/audit/audit.h>
98
99 #include <sys/kdebug.h>
100 #include <sys/sysproto.h>
101 #include <netinet/in.h>
102 #include <net/route.h>
103 #include <netinet/in_pcb.h>
104
105 #include <os/log.h>
106 #include <os/ptrtools.h>
107
108 #include <os/log.h>
109
110 #if CONFIG_MACF_SOCKET_SUBSET
111 #include <security/mac_framework.h>
112 #endif /* MAC_SOCKET_SUBSET */
113
114 #include <net/sockaddr_utils.h>
115
116 extern char *proc_name_address(void *p);
117
118 #define f_flag fp_glob->fg_flag
119 #define f_ops fp_glob->fg_ops
120
121 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
122 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
123 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
124 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
125 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
126 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
127 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
128 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
129 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
130 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
131 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
132 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
133 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
134 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
135 #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
136 #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
137
138 /* Forward declarations for referenced types */
139 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(void, void, __CCT_PTR);
140 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(uint8_t, uint8_t, __CCT_PTR);
141 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int32_t, int32, __CCT_REF);
142 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int, int, __CCT_REF);
143 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(user_ssize_t, user_ssize, __CCT_REF);
144 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(unsigned int, uint, __CCT_REF);
145 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(sae_connid_t, sae_connid, __CCT_REF);
146 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(socklen_t, socklen, __CCT_REF);
147 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct setsockopt_args, setsockopt_args, __CCT_REF);
148 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct connectx_args, connectx_args, __CCT_REF);
149 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct disconnectx_args, disconnectx_args, __CCT_REF);
150 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct cmsghdr, cmsghdr, __CCT_REF);
151 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct timeval, timeval, __CCT_REF);
152 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user64_timeval, user64_timeval, __CCT_REF);
153 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user32_timeval, user32_timeval, __CCT_REF);
154
155 static int sendit(proc_ref_t, socket_ref_t, user_msghdr_ref_t, uio_t,
156 int, int32_ref_t );
157 static int recvit(proc_ref_t, int, user_msghdr_ref_t, uio_t, user_addr_t,
158 int32_ref_t);
159 static int connectit(socket_ref_t, sockaddr_ref_t);
160 static int getsockaddr(socket_ref_t, sockaddr_ref_ref_t, user_addr_t,
161 size_t, boolean_t);
162 static int getsockaddr_s(socket_ref_t, sockaddr_storage_ref_t,
163 user_addr_t, size_t, boolean_t);
164 #if SENDFILE
165 static void alloc_sendpkt(int, size_t, uint_ref_t, mbuf_ref_ref_t,
166 boolean_t);
167 #endif /* SENDFILE */
168 static int connectx_nocancel(proc_ref_t, connectx_args_ref_t, int_ref_t);
169 static int connectitx(socket_ref_t, sockaddr_ref_t,
170 sockaddr_ref_t, proc_ref_t, uint32_t, sae_associd_t,
171 sae_connid_ref_t, uio_t, unsigned int, user_ssize_ref_t);
172 static int disconnectx_nocancel(proc_ref_t, disconnectx_args_ref_t,
173 int_ref_t);
174 static int socket_common(proc_ref_t, int, int, int, pid_t, int32_ref_t, int);
175
176 static int internalize_recv_msghdr_array(const void_ptr_t, int, int,
177 u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t);
178 static u_int externalize_recv_msghdr_array(proc_ref_t, socket_ref_t, void_ptr_t,
179 u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t, int_ref_t);
180
181 static recv_msg_elem_ptr_t alloc_recv_msg_array(u_int count);
182 static int recv_msg_array_is_valid(recv_msg_elem_ptr_t, u_int count);
183 static void free_recv_msg_array(recv_msg_elem_ptr_t, u_int count);
184 static int copyout_control(proc_ref_t, mbuf_ref_t, user_addr_t control,
185 socklen_ref_t, int_ref_t, socket_ref_t);
186
187 SYSCTL_DECL(_kern_ipc);
188
189 #define SO_MAX_MSG_X_DEFAULT 256
190
191 static u_int somaxsendmsgx = SO_MAX_MSG_X_DEFAULT;
192 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
193 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
194
195 static u_int somaxrecvmsgx = SO_MAX_MSG_X_DEFAULT;
196 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
197 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
198
199 static u_int missingpktinfo = 0;
200 SYSCTL_UINT(_kern_ipc, OID_AUTO, missingpktinfo,
201 CTLFLAG_RD | CTLFLAG_LOCKED, &missingpktinfo, 0, "");
202
203 static int do_recvmsg_x_donttrunc = 0;
204 SYSCTL_INT(_kern_ipc, OID_AUTO, do_recvmsg_x_donttrunc,
205 CTLFLAG_RW | CTLFLAG_LOCKED, &do_recvmsg_x_donttrunc, 0, "");
206
207 #if DEBUG || DEVELOPMENT
208 static int uipc_debug = 0;
209 SYSCTL_INT(_kern_ipc, OID_AUTO, debug,
210 CTLFLAG_RW | CTLFLAG_LOCKED, &uipc_debug, 0, "");
211
212 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
213 #define DBG_PRINTF(...) if (uipc_debug != 0) { \
214 os_log(OS_LOG_DEFAULT, __VA_ARGS__); \
215 }
216 #else
217 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
218 #define DBG_PRINTF(...) do { } while (0)
219 #endif
220
221
222 /*
223 * Values for sendmsg_x_mode
224 * 0: default
225 * 1: sendit loop one at a time
226 * 2: old implementation
227 */
228 static u_int sendmsg_x_mode = 0;
229 SYSCTL_UINT(_kern_ipc, OID_AUTO, sendmsg_x_mode,
230 CTLFLAG_RW | CTLFLAG_LOCKED, &sendmsg_x_mode, 0, "");
231
232 /*
233 * System call interface to the socket abstraction.
234 */
235
236 extern const struct fileops socketops;
237
238 /*
239 * Returns: 0 Success
240 * EACCES Mandatory Access Control failure
241 * falloc:ENFILE
242 * falloc:EMFILE
243 * falloc:ENOMEM
244 * socreate:EAFNOSUPPORT
245 * socreate:EPROTOTYPE
246 * socreate:EPROTONOSUPPORT
247 * socreate:ENOBUFS
248 * socreate:ENOMEM
249 * socreate:??? [other protocol families, IPSEC]
250 */
251 int
socket(proc_ref_t p,struct socket_args * uap,int32_ref_t retval)252 socket(proc_ref_t p,
253 struct socket_args *uap,
254 int32_ref_t retval)
255 {
256 return socket_common(p, uap->domain, uap->type, uap->protocol,
257 proc_selfpid(), retval, 0);
258 }
259
260 int
socket_delegate(proc_ref_t p,struct socket_delegate_args * uap,int32_ref_t retval)261 socket_delegate(proc_ref_t p,
262 struct socket_delegate_args *uap,
263 int32_ref_t retval)
264 {
265 return socket_common(p, uap->domain, uap->type, uap->protocol,
266 uap->epid, retval, 1);
267 }
268
269 static int
socket_common(proc_ref_t p,int domain,int type,int protocol,pid_t epid,int32_ref_t retval,int delegate)270 socket_common(proc_ref_t p,
271 int domain,
272 int type,
273 int protocol,
274 pid_t epid,
275 int32_ref_t retval,
276 int delegate)
277 {
278 socket_ref_t so;
279 fileproc_ref_t fp;
280 int fd, error;
281
282 AUDIT_ARG(socket, domain, type, protocol);
283 #if CONFIG_MACF_SOCKET_SUBSET
284 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
285 type, protocol)) != 0) {
286 return error;
287 }
288 #endif /* MAC_SOCKET_SUBSET */
289
290 if (delegate) {
291 error = priv_check_cred(kauth_cred_get(),
292 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
293 if (error) {
294 return EACCES;
295 }
296 }
297
298 error = falloc(p, &fp, &fd);
299 if (error) {
300 return error;
301 }
302 fp->f_flag = FREAD | FWRITE;
303 fp->f_ops = &socketops;
304
305 if (delegate) {
306 error = socreate_delegate(domain, &so, type, protocol, epid);
307 } else {
308 error = socreate(domain, &so, type, protocol);
309 }
310
311 if (error) {
312 fp_free(p, fd, fp);
313 } else {
314 fp_set_data(fp, so);
315
316 proc_fdlock(p);
317 procfdtbl_releasefd(p, fd, NULL);
318
319 if (ENTR_SHOULDTRACE) {
320 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
321 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
322 }
323 fp_drop(p, fd, fp, 1);
324 proc_fdunlock(p);
325
326 *retval = fd;
327 }
328 return error;
329 }
330
331 /*
332 * Returns: 0 Success
333 * EDESTADDRREQ Destination address required
334 * EBADF Bad file descriptor
335 * EACCES Mandatory Access Control failure
336 * file_socket:ENOTSOCK
337 * file_socket:EBADF
338 * getsockaddr:ENAMETOOLONG Filename too long
339 * getsockaddr:EINVAL Invalid argument
340 * getsockaddr:ENOMEM Not enough space
341 * getsockaddr:EFAULT Bad address
342 * sobindlock:???
343 */
344 /* ARGSUSED */
345 int
bind(__unused proc_t p,struct bind_args * uap,__unused int32_ref_t retval)346 bind(__unused proc_t p, struct bind_args *uap, __unused int32_ref_t retval)
347 {
348 struct sockaddr_storage ss;
349 sockaddr_ref_t sa = NULL;
350 socket_ref_t so;
351 boolean_t want_free = TRUE;
352 int error;
353
354 AUDIT_ARG(fd, uap->s);
355 error = file_socket(uap->s, &so);
356 if (error != 0) {
357 return error;
358 }
359 if (so == NULL) {
360 error = EBADF;
361 goto out;
362 }
363 if (uap->name == USER_ADDR_NULL) {
364 error = EDESTADDRREQ;
365 goto out;
366 }
367 if (uap->namelen > sizeof(ss)) {
368 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
369 } else {
370 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
371 if (error == 0) {
372 sa = SA(&ss);
373 want_free = FALSE;
374 }
375 }
376 if (error != 0) {
377 goto out;
378 }
379 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
380 #if CONFIG_MACF_SOCKET_SUBSET
381 if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
382 (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
383 error = sobindlock(so, sa, 1); /* will lock socket */
384 }
385 #else
386 error = sobindlock(so, sa, 1); /* will lock socket */
387 #endif /* MAC_SOCKET_SUBSET */
388 if (want_free) {
389 free_sockaddr(sa);
390 }
391 out:
392 file_drop(uap->s);
393 return error;
394 }
395
396 /*
397 * Returns: 0 Success
398 * EBADF
399 * EACCES Mandatory Access Control failure
400 * file_socket:ENOTSOCK
401 * file_socket:EBADF
402 * solisten:EINVAL
403 * solisten:EOPNOTSUPP
404 * solisten:???
405 */
406 int
listen(__unused proc_ref_t p,struct listen_args * uap,__unused int32_ref_t retval)407 listen(__unused proc_ref_t p, struct listen_args *uap,
408 __unused int32_ref_t retval)
409 {
410 int error;
411 socket_ref_t so;
412
413 AUDIT_ARG(fd, uap->s);
414 error = file_socket(uap->s, &so);
415 if (error) {
416 return error;
417 }
418 if (so != NULL)
419 #if CONFIG_MACF_SOCKET_SUBSET
420 {
421 error = mac_socket_check_listen(kauth_cred_get(), so);
422 if (error == 0) {
423 error = solisten(so, uap->backlog);
424 }
425 }
426 #else
427 { error = solisten(so, uap->backlog);}
428 #endif /* MAC_SOCKET_SUBSET */
429 else {
430 error = EBADF;
431 }
432
433 file_drop(uap->s);
434 return error;
435 }
436
437 /*
438 * Returns: fp_get_ftype:EBADF Bad file descriptor
439 * fp_get_ftype:ENOTSOCK Socket operation on non-socket
440 * :EFAULT Bad address on copyin/copyout
441 * :EBADF Bad file descriptor
442 * :EOPNOTSUPP Operation not supported on socket
443 * :EINVAL Invalid argument
444 * :EWOULDBLOCK Operation would block
445 * :ECONNABORTED Connection aborted
446 * :EINTR Interrupted function
447 * :EACCES Mandatory Access Control failure
448 * falloc:ENFILE Too many files open in system
449 * falloc:EMFILE Too many open files
450 * falloc:ENOMEM Not enough space
451 * 0 Success
452 */
453 int
accept_nocancel(proc_ref_t p,struct accept_nocancel_args * uap,int32_ref_t retval)454 accept_nocancel(proc_ref_t p, struct accept_nocancel_args *uap,
455 int32_ref_t retval)
456 {
457 fileproc_ref_t fp;
458 sockaddr_ref_t sa = NULL;
459 socklen_t namelen;
460 int error;
461 socket_ref_t head;
462 socket_ref_t so = NULL;
463 lck_mtx_t *mutex_held;
464 int fd = uap->s;
465 int newfd;
466 unsigned int fflag;
467 int dosocklock = 0;
468
469 *retval = -1;
470
471 AUDIT_ARG(fd, uap->s);
472
473 if (uap->name) {
474 error = copyin(uap->anamelen, (caddr_t)&namelen,
475 sizeof(socklen_t));
476 if (error) {
477 return error;
478 }
479 }
480 error = fp_get_ftype(p, fd, DTYPE_SOCKET, ENOTSOCK, &fp);
481 if (error) {
482 return error;
483 }
484 head = (struct socket *)fp_get_data(fp);
485
486 #if CONFIG_MACF_SOCKET_SUBSET
487 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
488 goto out;
489 }
490 #endif /* MAC_SOCKET_SUBSET */
491
492 socket_lock(head, 1);
493
494 if (head->so_proto->pr_getlock != NULL) {
495 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
496 dosocklock = 1;
497 } else {
498 mutex_held = head->so_proto->pr_domain->dom_mtx;
499 dosocklock = 0;
500 }
501
502 if ((head->so_options & SO_ACCEPTCONN) == 0) {
503 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
504 error = EOPNOTSUPP;
505 } else {
506 /* POSIX: The socket is not accepting connections */
507 error = EINVAL;
508 }
509 socket_unlock(head, 1);
510 DBG_PRINTF("%s:%d accept() SO_ACCEPTCONN %d: msleep", proc_name_address(p), proc_selfpid(), error);
511 goto out;
512 }
513 check_again:
514 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
515 socket_unlock(head, 1);
516 error = EWOULDBLOCK;
517 goto out;
518 }
519 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
520 if (head->so_state & SS_CANTRCVMORE) {
521 head->so_error = ECONNABORTED;
522 break;
523 }
524 if (head->so_usecount < 1) {
525 panic("accept: head=%p refcount=%d", head,
526 head->so_usecount);
527 }
528 error = msleep((caddr_t)&head->so_timeo, mutex_held,
529 PSOCK | PCATCH, "accept", 0);
530 if (head->so_usecount < 1) {
531 panic("accept: 2 head=%p refcount=%d", head,
532 head->so_usecount);
533 }
534 if ((head->so_state & SS_DRAINING)) {
535 error = ECONNABORTED;
536 }
537 if (error) {
538 DBG_PRINTF("%s:%d accept() error %d: msleep", proc_name_address(p), proc_selfpid(), error);
539 socket_unlock(head, 1);
540 goto out;
541 }
542 }
543 if (head->so_error) {
544 error = head->so_error;
545 head->so_error = 0;
546 socket_unlock(head, 1);
547 DBG_PRINTF("%s:%d accept() error %d: head->so_error", proc_name_address(p), proc_selfpid(), error);
548 goto out;
549 }
550
551 /*
552 * At this point we know that there is at least one connection
553 * ready to be accepted. Remove it from the queue prior to
554 * allocating the file descriptor for it since falloc() may
555 * block allowing another process to accept the connection
556 * instead.
557 */
558 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
559
560 so_acquire_accept_list(head, NULL);
561 if (TAILQ_EMPTY(&head->so_comp)) {
562 so_release_accept_list(head);
563 goto check_again;
564 }
565
566 so = TAILQ_FIRST(&head->so_comp);
567 TAILQ_REMOVE(&head->so_comp, so, so_list);
568 /*
569 * Acquire the lock of the new connection
570 * as we may be in the process of receiving
571 * a packet that may change its so_state
572 * (e.g.: a TCP FIN).
573 */
574 if (dosocklock) {
575 socket_lock(so, 0);
576 }
577 so->so_head = NULL;
578 so->so_state &= ~SS_COMP;
579 if (dosocklock) {
580 socket_unlock(so, 0);
581 }
582 head->so_qlen--;
583 so_release_accept_list(head);
584
585 /* unlock head to avoid deadlock with select, keep a ref on head */
586 socket_unlock(head, 0);
587
588 #if CONFIG_MACF_SOCKET_SUBSET
589 /*
590 * Pass the pre-accepted socket to the MAC framework. This is
591 * cheaper than allocating a file descriptor for the socket,
592 * calling the protocol accept callback, and possibly freeing
593 * the file descriptor should the MAC check fails.
594 */
595 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
596 socket_lock(so, 1);
597 so->so_state &= ~SS_NOFDREF;
598 socket_unlock(so, 1);
599 soclose(so);
600 /* Drop reference on listening socket */
601 sodereference(head);
602 goto out;
603 }
604 #endif /* MAC_SOCKET_SUBSET */
605
606 /*
607 * Pass the pre-accepted socket to any interested socket filter(s).
608 * Upon failure, the socket would have been closed by the callee.
609 */
610 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
611 /* Drop reference on listening socket */
612 sodereference(head);
613 /* Propagate socket filter's error code to the caller */
614 DBG_PRINTF("%s:%d accept() error %d: soacceptfilter", proc_name_address(p), proc_selfpid(), error);
615 goto out;
616 }
617
618 fflag = fp->f_flag;
619 error = falloc(p, &fp, &newfd);
620 if (error) {
621 /*
622 * Probably ran out of file descriptors.
623 *
624 * <rdar://problem/8554930>
625 * Don't put this back on the socket like we used to, that
626 * just causes the client to spin. Drop the socket.
627 */
628 socket_lock(so, 1);
629 so->so_state &= ~SS_NOFDREF;
630 socket_unlock(so, 1);
631 soclose(so);
632 sodereference(head);
633 DBG_PRINTF("%s:%d accept() error %d: falloc", proc_name_address(p), proc_selfpid(), error);
634 goto out;
635 }
636 *retval = newfd;
637 fp->f_flag = fflag;
638 fp->f_ops = &socketops;
639 fp_set_data(fp, so);
640
641 socket_lock(head, 0);
642 if (dosocklock) {
643 socket_lock(so, 1);
644 }
645
646 /* Sync socket non-blocking/async state with file flags */
647 if (fp->f_flag & FNONBLOCK) {
648 so->so_state |= SS_NBIO;
649 } else {
650 so->so_state &= ~SS_NBIO;
651 }
652
653 if (fp->f_flag & FASYNC) {
654 so->so_state |= SS_ASYNC;
655 so->so_rcv.sb_flags |= SB_ASYNC;
656 so->so_snd.sb_flags |= SB_ASYNC;
657 } else {
658 so->so_state &= ~SS_ASYNC;
659 so->so_rcv.sb_flags &= ~SB_ASYNC;
660 so->so_snd.sb_flags &= ~SB_ASYNC;
661 }
662
663 (void) soacceptlock(so, &sa, 0);
664 socket_unlock(head, 1);
665 if (sa == NULL) {
666 namelen = 0;
667 if (uap->name) {
668 goto gotnoname;
669 }
670 error = 0;
671 goto releasefd;
672 }
673 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
674
675 if (uap->name) {
676 socklen_t sa_len;
677
678 /* save sa_len before it is destroyed */
679 sa_len = sa->sa_len;
680 namelen = MIN(namelen, sa_len);
681 error = copyout(__SA_UTILS_CONV_TO_BYTES(sa), uap->name, namelen);
682 if (!error) {
683 /* return the actual, untruncated address length */
684 namelen = sa_len;
685 }
686 gotnoname:
687 error = copyout((caddr_t)&namelen, uap->anamelen,
688 sizeof(socklen_t));
689 if (__improbable(error != 0)) {
690 DBG_PRINTF("%s:%d accept() error %d: falloc", proc_name_address(p), proc_selfpid(), error);
691 }
692 }
693 free_sockaddr(sa);
694
695 releasefd:
696 /*
697 * If the socket has been marked as inactive by sosetdefunct(),
698 * disallow further operations on it.
699 */
700 if (so->so_flags & SOF_DEFUNCT) {
701 sodefunct(current_proc(), so,
702 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
703 }
704
705 if (dosocklock) {
706 socket_unlock(so, 1);
707 }
708
709 proc_fdlock(p);
710 procfdtbl_releasefd(p, newfd, NULL);
711 fp_drop(p, newfd, fp, 1);
712 proc_fdunlock(p);
713
714 out:
715 if (error == 0 && ENTR_SHOULDTRACE) {
716 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
717 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
718 }
719
720 file_drop(fd);
721 return error;
722 }
723
724 int
accept(proc_ref_t p,struct accept_args * uap,int32_ref_t retval)725 accept(proc_ref_t p, struct accept_args *uap, int32_ref_t retval)
726 {
727 __pthread_testcancel(1);
728 return accept_nocancel(p, (struct accept_nocancel_args *)uap,
729 retval);
730 }
731
732 /*
733 * Returns: 0 Success
734 * EBADF Bad file descriptor
735 * EALREADY Connection already in progress
736 * EINPROGRESS Operation in progress
737 * ECONNABORTED Connection aborted
738 * EINTR Interrupted function
739 * EACCES Mandatory Access Control failure
740 * file_socket:ENOTSOCK
741 * file_socket:EBADF
742 * getsockaddr:ENAMETOOLONG Filename too long
743 * getsockaddr:EINVAL Invalid argument
744 * getsockaddr:ENOMEM Not enough space
745 * getsockaddr:EFAULT Bad address
746 * soconnectlock:EOPNOTSUPP
747 * soconnectlock:EISCONN
748 * soconnectlock:??? [depends on protocol, filters]
749 * msleep:EINTR
750 *
751 * Imputed: so_error error may be set from so_error, which
752 * may have been set by soconnectlock.
753 */
754 /* ARGSUSED */
755 int
connect(proc_ref_t p,struct connect_args * uap,int32_ref_t retval)756 connect(proc_ref_t p, struct connect_args *uap, int32_ref_t retval)
757 {
758 __pthread_testcancel(1);
759 return connect_nocancel(p, (struct connect_nocancel_args *)uap,
760 retval);
761 }
762
763 int
connect_nocancel(proc_t p,struct connect_nocancel_args * uap,int32_ref_t retval)764 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_ref_t retval)
765 {
766 #pragma unused(p, retval)
767 socket_ref_t so;
768 struct sockaddr_storage ss;
769 sockaddr_ref_t sa = NULL;
770 int error;
771 int fd = uap->s;
772 boolean_t dgram;
773
774 AUDIT_ARG(fd, uap->s);
775 error = file_socket(fd, &so);
776 if (error != 0) {
777 return error;
778 }
779 if (so == NULL) {
780 error = EBADF;
781 goto out;
782 }
783
784 /*
785 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
786 * if this is a datagram socket; translate for other types.
787 */
788 dgram = (so->so_type == SOCK_DGRAM);
789
790 /* Get socket address now before we obtain socket lock */
791 if (uap->namelen > sizeof(ss)) {
792 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
793 } else {
794 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
795 if (error == 0) {
796 sa = SA(&ss);
797 }
798 }
799 if (error != 0) {
800 goto out;
801 }
802
803 error = connectit(so, sa);
804
805 if (sa != NULL && sa != SA(&ss)) {
806 free_sockaddr(sa);
807 }
808 if (error == ERESTART) {
809 error = EINTR;
810 }
811 out:
812 file_drop(fd);
813 return error;
814 }
815
816 static int
connectx_nocancel(proc_ref_t p,connectx_args_ref_t uap,int_ref_t retval)817 connectx_nocancel(proc_ref_t p, connectx_args_ref_t uap, int_ref_t retval)
818 {
819 #pragma unused(p, retval)
820 struct sockaddr_storage ss, sd;
821 sockaddr_ref_t src = NULL, dst = NULL;
822 socket_ref_t so;
823 int error, error1, fd = uap->socket;
824 boolean_t dgram;
825 sae_connid_t cid = SAE_CONNID_ANY;
826 struct user32_sa_endpoints ep32;
827 struct user64_sa_endpoints ep64;
828 struct user_sa_endpoints ep;
829 user_ssize_t bytes_written = 0;
830 struct user_iovec *iovp;
831 uio_t auio = NULL;
832
833 AUDIT_ARG(fd, uap->socket);
834 error = file_socket(fd, &so);
835 if (error != 0) {
836 return error;
837 }
838 if (so == NULL) {
839 error = EBADF;
840 goto out;
841 }
842
843 if (uap->endpoints == USER_ADDR_NULL) {
844 error = EINVAL;
845 goto out;
846 }
847
848 if (IS_64BIT_PROCESS(p)) {
849 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
850 if (error != 0) {
851 goto out;
852 }
853
854 ep.sae_srcif = ep64.sae_srcif;
855 ep.sae_srcaddr = (user_addr_t)ep64.sae_srcaddr;
856 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
857 ep.sae_dstaddr = (user_addr_t)ep64.sae_dstaddr;
858 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
859 } else {
860 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
861 if (error != 0) {
862 goto out;
863 }
864
865 ep.sae_srcif = ep32.sae_srcif;
866 ep.sae_srcaddr = ep32.sae_srcaddr;
867 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
868 ep.sae_dstaddr = ep32.sae_dstaddr;
869 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
870 }
871
872 /*
873 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
874 * if this is a datagram socket; translate for other types.
875 */
876 dgram = (so->so_type == SOCK_DGRAM);
877
878 /* Get socket address now before we obtain socket lock */
879 if (ep.sae_srcaddr != USER_ADDR_NULL) {
880 if (ep.sae_srcaddrlen > sizeof(ss)) {
881 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
882 } else {
883 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
884 if (error == 0) {
885 src = SA(&ss);
886 }
887 }
888
889 if (error) {
890 goto out;
891 }
892 }
893
894 if (ep.sae_dstaddr == USER_ADDR_NULL) {
895 error = EINVAL;
896 goto out;
897 }
898
899 /* Get socket address now before we obtain socket lock */
900 if (ep.sae_dstaddrlen > sizeof(sd)) {
901 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
902 } else {
903 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
904 if (error == 0) {
905 dst = SA(&sd);
906 }
907 }
908
909 if (error) {
910 goto out;
911 }
912
913 VERIFY(dst != NULL);
914
915 if (uap->iov != USER_ADDR_NULL) {
916 /* Verify range before calling uio_create() */
917 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
918 error = EINVAL;
919 goto out;
920 }
921
922 if (uap->len == USER_ADDR_NULL) {
923 error = EINVAL;
924 goto out;
925 }
926
927 /* allocate a uio to hold the number of iovecs passed */
928 auio = uio_create(uap->iovcnt, 0,
929 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
930 UIO_WRITE);
931
932 if (auio == NULL) {
933 error = ENOMEM;
934 goto out;
935 }
936
937 /*
938 * get location of iovecs within the uio.
939 * then copyin the iovecs from user space.
940 */
941 iovp = uio_iovsaddr_user(auio);
942 if (iovp == NULL) {
943 error = ENOMEM;
944 goto out;
945 }
946 error = copyin_user_iovec_array(uap->iov,
947 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
948 uap->iovcnt, iovp);
949 if (error != 0) {
950 goto out;
951 }
952
953 /* finish setup of uio_t */
954 error = uio_calculateresid_user(auio);
955 if (error != 0) {
956 goto out;
957 }
958 }
959
960 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
961 &cid, auio, uap->flags, &bytes_written);
962 if (error == ERESTART) {
963 error = EINTR;
964 }
965
966 if (uap->len != USER_ADDR_NULL) {
967 if (IS_64BIT_PROCESS(p)) {
968 error1 = copyout(&bytes_written, uap->len, sizeof(user64_size_t));
969 } else {
970 error1 = copyout(&bytes_written, uap->len, sizeof(user32_size_t));
971 }
972 /* give precedence to connectitx errors */
973 if ((error1 != 0) && (error == 0)) {
974 error = error1;
975 }
976 }
977
978 if (uap->connid != USER_ADDR_NULL) {
979 error1 = copyout(&cid, uap->connid, sizeof(cid));
980 /* give precedence to connectitx errors */
981 if ((error1 != 0) && (error == 0)) {
982 error = error1;
983 }
984 }
985 out:
986 file_drop(fd);
987 if (auio != NULL) {
988 uio_free(auio);
989 }
990 if (src != NULL && src != SA(&ss)) {
991 free_sockaddr(src);
992 }
993 if (dst != NULL && dst != SA(&sd)) {
994 free_sockaddr(dst);
995 }
996 return error;
997 }
998
999 int
connectx(proc_ref_t p,struct connectx_args * uap,int * retval)1000 connectx(proc_ref_t p, struct connectx_args *uap, int *retval)
1001 {
1002 /*
1003 * Due to similiarity with a POSIX interface, define as
1004 * an unofficial cancellation point.
1005 */
1006 __pthread_testcancel(1);
1007 return connectx_nocancel(p, uap, retval);
1008 }
1009
1010 static int
connectit(struct socket * so,sockaddr_ref_t sa)1011 connectit(struct socket *so, sockaddr_ref_t sa)
1012 {
1013 int error;
1014
1015 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
1016 #if CONFIG_MACF_SOCKET_SUBSET
1017 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
1018 return error;
1019 }
1020 #endif /* MAC_SOCKET_SUBSET */
1021
1022 socket_lock(so, 1);
1023 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1024 error = EALREADY;
1025 goto out;
1026 }
1027 error = soconnectlock(so, sa, 0);
1028 if (error != 0) {
1029 goto out;
1030 }
1031 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1032 error = EINPROGRESS;
1033 goto out;
1034 }
1035 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1036 lck_mtx_t *mutex_held;
1037
1038 if (so->so_proto->pr_getlock != NULL) {
1039 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1040 } else {
1041 mutex_held = so->so_proto->pr_domain->dom_mtx;
1042 }
1043 error = msleep((caddr_t)&so->so_timeo, mutex_held,
1044 PSOCK | PCATCH, __func__, 0);
1045 if (so->so_state & SS_DRAINING) {
1046 error = ECONNABORTED;
1047 }
1048 if (error != 0) {
1049 break;
1050 }
1051 }
1052 if (error == 0) {
1053 error = so->so_error;
1054 so->so_error = 0;
1055 }
1056 out:
1057 socket_unlock(so, 1);
1058 return error;
1059 }
1060
1061 static int
connectitx(struct socket * so,sockaddr_ref_t src,sockaddr_ref_t dst,proc_ref_t p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uio_t auio,unsigned int flags,user_ssize_t * bytes_written)1062 connectitx(struct socket *so, sockaddr_ref_t src,
1063 sockaddr_ref_t dst, proc_ref_t p, uint32_t ifscope,
1064 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
1065 user_ssize_t *bytes_written)
1066 {
1067 int error;
1068
1069 VERIFY(dst != NULL);
1070
1071 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1072 #if CONFIG_MACF_SOCKET_SUBSET
1073 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1074 return error;
1075 }
1076
1077 if (auio != NULL) {
1078 if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1079 return error;
1080 }
1081 }
1082 #endif /* MAC_SOCKET_SUBSET */
1083
1084 socket_lock(so, 1);
1085 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1086 error = EALREADY;
1087 goto out;
1088 }
1089
1090 error = soconnectxlocked(so, src, dst, p, ifscope,
1091 aid, pcid, flags, NULL, 0, auio, bytes_written);
1092 if (error != 0) {
1093 goto out;
1094 }
1095 /*
1096 * If, after the call to soconnectxlocked the flag is still set (in case
1097 * data has been queued and the connect() has actually been triggered,
1098 * it will have been unset by the transport), we exit immediately. There
1099 * is no reason to wait on any event.
1100 */
1101 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1102 error = 0;
1103 goto out;
1104 }
1105 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1106 error = EINPROGRESS;
1107 goto out;
1108 }
1109 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1110 lck_mtx_t *mutex_held;
1111
1112 if (so->so_proto->pr_getlock != NULL) {
1113 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1114 } else {
1115 mutex_held = so->so_proto->pr_domain->dom_mtx;
1116 }
1117 error = msleep((caddr_t)&so->so_timeo, mutex_held,
1118 PSOCK | PCATCH, __func__, 0);
1119 if (so->so_state & SS_DRAINING) {
1120 error = ECONNABORTED;
1121 }
1122 if (error != 0) {
1123 break;
1124 }
1125 }
1126 if (error == 0) {
1127 error = so->so_error;
1128 so->so_error = 0;
1129 }
1130 out:
1131 socket_unlock(so, 1);
1132 return error;
1133 }
1134
1135 int
peeloff(proc_ref_t p,struct peeloff_args * uap,int * retval)1136 peeloff(proc_ref_t p, struct peeloff_args *uap, int *retval)
1137 {
1138 #pragma unused(p, uap, retval)
1139 /*
1140 * Due to similiarity with a POSIX interface, define as
1141 * an unofficial cancellation point.
1142 */
1143 __pthread_testcancel(1);
1144 return 0;
1145 }
1146
1147 int
disconnectx(proc_ref_t p,struct disconnectx_args * uap,int * retval)1148 disconnectx(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1149 {
1150 /*
1151 * Due to similiarity with a POSIX interface, define as
1152 * an unofficial cancellation point.
1153 */
1154 __pthread_testcancel(1);
1155 return disconnectx_nocancel(p, uap, retval);
1156 }
1157
1158 static int
disconnectx_nocancel(proc_ref_t p,struct disconnectx_args * uap,int * retval)1159 disconnectx_nocancel(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1160 {
1161 #pragma unused(p, retval)
1162 socket_ref_t so;
1163 int fd = uap->s;
1164 int error;
1165
1166 error = file_socket(fd, &so);
1167 if (error != 0) {
1168 return error;
1169 }
1170 if (so == NULL) {
1171 error = EBADF;
1172 goto out;
1173 }
1174
1175 error = sodisconnectx(so, uap->aid, uap->cid);
1176 out:
1177 file_drop(fd);
1178 return error;
1179 }
1180
1181 /*
1182 * Returns: 0 Success
1183 * socreate:EAFNOSUPPORT
1184 * socreate:EPROTOTYPE
1185 * socreate:EPROTONOSUPPORT
1186 * socreate:ENOBUFS
1187 * socreate:ENOMEM
1188 * socreate:EISCONN
1189 * socreate:??? [other protocol families, IPSEC]
1190 * falloc:ENFILE
1191 * falloc:EMFILE
1192 * falloc:ENOMEM
1193 * copyout:EFAULT
1194 * soconnect2:EINVAL
1195 * soconnect2:EPROTOTYPE
1196 * soconnect2:??? [other protocol families[
1197 */
1198 int
socketpair(proc_ref_t p,struct socketpair_args * uap,__unused int32_ref_t retval)1199 socketpair(proc_ref_t p, struct socketpair_args *uap,
1200 __unused int32_ref_t retval)
1201 {
1202 fileproc_ref_t fp1, fp2;
1203 socket_ref_t so1, so2;
1204 int fd, error, sv[2];
1205
1206 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1207 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1208 if (error) {
1209 return error;
1210 }
1211 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1212 if (error) {
1213 goto free1;
1214 }
1215
1216 error = falloc(p, &fp1, &fd);
1217 if (error) {
1218 goto free2;
1219 }
1220 fp1->f_flag = FREAD | FWRITE;
1221 fp1->f_ops = &socketops;
1222 fp_set_data(fp1, so1);
1223 sv[0] = fd;
1224
1225 error = falloc(p, &fp2, &fd);
1226 if (error) {
1227 goto free3;
1228 }
1229 fp2->f_flag = FREAD | FWRITE;
1230 fp2->f_ops = &socketops;
1231 fp_set_data(fp2, so2);
1232 sv[1] = fd;
1233
1234 error = soconnect2(so1, so2);
1235 if (error) {
1236 goto free4;
1237 }
1238 if (uap->type == SOCK_DGRAM) {
1239 /*
1240 * Datagram socket connection is asymmetric.
1241 */
1242 error = soconnect2(so2, so1);
1243 if (error) {
1244 goto free4;
1245 }
1246 }
1247
1248 if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1249 goto free4;
1250 }
1251
1252 proc_fdlock(p);
1253 procfdtbl_releasefd(p, sv[0], NULL);
1254 procfdtbl_releasefd(p, sv[1], NULL);
1255 fp_drop(p, sv[0], fp1, 1);
1256 fp_drop(p, sv[1], fp2, 1);
1257 proc_fdunlock(p);
1258
1259 return 0;
1260 free4:
1261 fp_free(p, sv[1], fp2);
1262 free3:
1263 fp_free(p, sv[0], fp1);
1264 free2:
1265 (void) soclose(so2);
1266 free1:
1267 (void) soclose(so1);
1268 return error;
1269 }
1270
1271 /*
1272 * Returns: 0 Success
1273 * EINVAL
1274 * ENOBUFS
1275 * EBADF
1276 * EPIPE
1277 * EACCES Mandatory Access Control failure
1278 * file_socket:ENOTSOCK
1279 * file_socket:EBADF
1280 * getsockaddr:ENAMETOOLONG Filename too long
1281 * getsockaddr:EINVAL Invalid argument
1282 * getsockaddr:ENOMEM Not enough space
1283 * getsockaddr:EFAULT Bad address
1284 * <pru_sosend>:EACCES[TCP]
1285 * <pru_sosend>:EADDRINUSE[TCP]
1286 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1287 * <pru_sosend>:EAFNOSUPPORT[TCP]
1288 * <pru_sosend>:EAGAIN[TCP]
1289 * <pru_sosend>:EBADF
1290 * <pru_sosend>:ECONNRESET[TCP]
1291 * <pru_sosend>:EFAULT
1292 * <pru_sosend>:EHOSTUNREACH[TCP]
1293 * <pru_sosend>:EINTR
1294 * <pru_sosend>:EINVAL
1295 * <pru_sosend>:EISCONN[AF_INET]
1296 * <pru_sosend>:EMSGSIZE[TCP]
1297 * <pru_sosend>:ENETDOWN[TCP]
1298 * <pru_sosend>:ENETUNREACH[TCP]
1299 * <pru_sosend>:ENOBUFS
1300 * <pru_sosend>:ENOMEM[TCP]
1301 * <pru_sosend>:ENOTCONN[AF_INET]
1302 * <pru_sosend>:EOPNOTSUPP
1303 * <pru_sosend>:EPERM[TCP]
1304 * <pru_sosend>:EPIPE
1305 * <pru_sosend>:EWOULDBLOCK
1306 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1307 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1308 * <pru_sosend>:??? [value from so_error]
1309 * sockargs:???
1310 */
1311 static int
sendit(proc_ref_t p,struct socket * so,user_msghdr_ref_t mp,uio_t uiop,int flags,int32_ref_t retval)1312 sendit(proc_ref_t p, struct socket *so, user_msghdr_ref_t mp, uio_t uiop,
1313 int flags, int32_ref_t retval)
1314 {
1315 mbuf_ref_t control = NULL;
1316 struct sockaddr_storage ss;
1317 sockaddr_ref_t to = NULL;
1318 boolean_t want_free = TRUE;
1319 int error;
1320 user_ssize_t len;
1321
1322 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1323
1324 if (mp->msg_name != USER_ADDR_NULL) {
1325 if (mp->msg_namelen > sizeof(ss)) {
1326 error = getsockaddr(so, &to, mp->msg_name,
1327 mp->msg_namelen, TRUE);
1328 } else {
1329 error = getsockaddr_s(so, &ss, mp->msg_name,
1330 mp->msg_namelen, TRUE);
1331 if (error == 0) {
1332 to = SA(&ss);
1333 want_free = FALSE;
1334 }
1335 }
1336 if (error != 0) {
1337 goto out;
1338 }
1339 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1340 }
1341 if (mp->msg_control != USER_ADDR_NULL) {
1342 if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1343 error = EINVAL;
1344 goto bad;
1345 }
1346 error = sockargs(&control, mp->msg_control,
1347 mp->msg_controllen, MT_CONTROL);
1348 if (error != 0) {
1349 goto bad;
1350 }
1351 }
1352
1353 #if CONFIG_MACF_SOCKET_SUBSET
1354 /*
1355 * We check the state without holding the socket lock;
1356 * if a race condition occurs, it would simply result
1357 * in an extra call to the MAC check function.
1358 */
1359 if (to != NULL &&
1360 !(so->so_state & SS_DEFUNCT) &&
1361 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1362 if (control != NULL) {
1363 m_freem(control);
1364 }
1365
1366 goto bad;
1367 }
1368 #endif /* MAC_SOCKET_SUBSET */
1369
1370 len = uio_resid(uiop);
1371 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1372 control, flags);
1373 if (error != 0) {
1374 if (uio_resid(uiop) != len && (error == ERESTART ||
1375 error == EINTR || error == EWOULDBLOCK)) {
1376 error = 0;
1377 }
1378 /* Generation of SIGPIPE can be controlled per socket */
1379 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1380 !(flags & MSG_NOSIGNAL)) {
1381 psignal(p, SIGPIPE);
1382 }
1383 }
1384 if (error == 0) {
1385 *retval = (int)(len - uio_resid(uiop));
1386 }
1387 bad:
1388 if (want_free) {
1389 free_sockaddr(to);
1390 }
1391 out:
1392 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1393
1394 return error;
1395 }
1396
1397 /*
1398 * Returns: 0 Success
1399 * ENOMEM
1400 * sendit:??? [see sendit definition in this file]
1401 * write:??? [4056224: applicable for pipes]
1402 */
1403 int
sendto(proc_ref_t p,struct sendto_args * uap,int32_ref_t retval)1404 sendto(proc_ref_t p, struct sendto_args *uap, int32_ref_t retval)
1405 {
1406 __pthread_testcancel(1);
1407 return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1408 }
1409
1410 int
sendto_nocancel(proc_ref_t p,struct sendto_nocancel_args * uap,int32_ref_t retval)1411 sendto_nocancel(proc_ref_t p,
1412 struct sendto_nocancel_args *uap,
1413 int32_ref_t retval)
1414 {
1415 struct user_msghdr msg;
1416 int error;
1417 uio_t auio = NULL;
1418 socket_ref_t so;
1419
1420 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1421 AUDIT_ARG(fd, uap->s);
1422
1423 if (uap->flags & MSG_SKIPCFIL) {
1424 error = EPERM;
1425 goto done;
1426 }
1427
1428 if (uap->len > LONG_MAX) {
1429 error = EINVAL;
1430 goto done;
1431 }
1432
1433 auio = uio_create(1, 0,
1434 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1435 UIO_WRITE);
1436 if (auio == NULL) {
1437 error = ENOMEM;
1438 goto done;
1439 }
1440 uio_addiov(auio, uap->buf, uap->len);
1441
1442 msg.msg_name = uap->to;
1443 msg.msg_namelen = uap->tolen;
1444 /* no need to set up msg_iov. sendit uses uio_t we send it */
1445 msg.msg_iov = 0;
1446 msg.msg_iovlen = 0;
1447 msg.msg_control = 0;
1448 msg.msg_flags = 0;
1449
1450 error = file_socket(uap->s, &so);
1451 if (error) {
1452 goto done;
1453 }
1454
1455 if (so == NULL) {
1456 error = EBADF;
1457 } else {
1458 error = sendit(p, so, &msg, auio, uap->flags, retval);
1459 }
1460
1461 file_drop(uap->s);
1462 done:
1463 if (auio != NULL) {
1464 uio_free(auio);
1465 }
1466
1467 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1468
1469 return error;
1470 }
1471
1472 /*
1473 * Returns: 0 Success
1474 * ENOBUFS
1475 * copyin:EFAULT
1476 * sendit:??? [see sendit definition in this file]
1477 */
1478 int
sendmsg(proc_ref_t p,struct sendmsg_args * uap,int32_ref_t retval)1479 sendmsg(proc_ref_t p, struct sendmsg_args *uap, int32_ref_t retval)
1480 {
1481 __pthread_testcancel(1);
1482 return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1483 retval);
1484 }
1485
1486 int
sendmsg_nocancel(proc_ref_t p,struct sendmsg_nocancel_args * uap,int32_ref_t retval)1487 sendmsg_nocancel(proc_ref_t p, struct sendmsg_nocancel_args *uap,
1488 int32_ref_t retval)
1489 {
1490 struct user32_msghdr msg32;
1491 struct user64_msghdr msg64;
1492 struct user_msghdr user_msg;
1493 caddr_t msghdrp;
1494 int size_of_msghdr;
1495 int error;
1496 uio_t auio = NULL;
1497 struct user_iovec *iovp;
1498 socket_ref_t so;
1499
1500 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1501
1502 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1503 AUDIT_ARG(fd, uap->s);
1504
1505 if (uap->flags & MSG_SKIPCFIL) {
1506 error = EPERM;
1507 goto done;
1508 }
1509
1510 if (is_p_64bit_process) {
1511 msghdrp = (caddr_t)&msg64;
1512 size_of_msghdr = sizeof(msg64);
1513 } else {
1514 msghdrp = (caddr_t)&msg32;
1515 size_of_msghdr = sizeof(msg32);
1516 }
1517 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1518 if (error) {
1519 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1520 return error;
1521 }
1522
1523 if (is_p_64bit_process) {
1524 user_msg.msg_flags = msg64.msg_flags;
1525 user_msg.msg_controllen = msg64.msg_controllen;
1526 user_msg.msg_control = (user_addr_t)msg64.msg_control;
1527 user_msg.msg_iovlen = msg64.msg_iovlen;
1528 user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
1529 user_msg.msg_namelen = msg64.msg_namelen;
1530 user_msg.msg_name = (user_addr_t)msg64.msg_name;
1531 } else {
1532 user_msg.msg_flags = msg32.msg_flags;
1533 user_msg.msg_controllen = msg32.msg_controllen;
1534 user_msg.msg_control = msg32.msg_control;
1535 user_msg.msg_iovlen = msg32.msg_iovlen;
1536 user_msg.msg_iov = msg32.msg_iov;
1537 user_msg.msg_namelen = msg32.msg_namelen;
1538 user_msg.msg_name = msg32.msg_name;
1539 }
1540
1541 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1542 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1543 0, 0, 0, 0);
1544 return EMSGSIZE;
1545 }
1546
1547 /* allocate a uio large enough to hold the number of iovecs passed */
1548 auio = uio_create(user_msg.msg_iovlen, 0,
1549 (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1550 UIO_WRITE);
1551 if (auio == NULL) {
1552 error = ENOBUFS;
1553 goto done;
1554 }
1555
1556 if (user_msg.msg_iovlen) {
1557 /*
1558 * get location of iovecs within the uio.
1559 * then copyin the iovecs from user space.
1560 */
1561 iovp = uio_iovsaddr_user(auio);
1562 if (iovp == NULL) {
1563 error = ENOBUFS;
1564 goto done;
1565 }
1566 error = copyin_user_iovec_array(user_msg.msg_iov,
1567 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1568 user_msg.msg_iovlen, iovp);
1569 if (error) {
1570 goto done;
1571 }
1572 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1573
1574 /* finish setup of uio_t */
1575 error = uio_calculateresid_user(auio);
1576 if (error) {
1577 goto done;
1578 }
1579 } else {
1580 user_msg.msg_iov = 0;
1581 }
1582
1583 /* msg_flags is ignored for send */
1584 user_msg.msg_flags = 0;
1585
1586 error = file_socket(uap->s, &so);
1587 if (error) {
1588 goto done;
1589 }
1590 if (so == NULL) {
1591 error = EBADF;
1592 } else {
1593 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1594 }
1595 file_drop(uap->s);
1596 done:
1597 if (auio != NULL) {
1598 uio_free(auio);
1599 }
1600 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1601
1602 return error;
1603 }
1604
1605 static int
internalize_user_msg_x(struct user_msghdr * user_msg,uio_t * auiop,proc_ref_t p,void_ptr_t user_msghdr_x_src)1606 internalize_user_msg_x(struct user_msghdr *user_msg, uio_t *auiop, proc_ref_t p, void_ptr_t user_msghdr_x_src)
1607 {
1608 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1609 uio_t auio = *auiop;
1610 int error;
1611
1612 if (is_p_64bit_process) {
1613 struct user64_msghdr_x msghdrx64;
1614
1615 error = copyin((user_addr_t)user_msghdr_x_src,
1616 &msghdrx64, sizeof(msghdrx64));
1617 if (error != 0) {
1618 DBG_PRINTF("%s copyin() msghdrx64 failed %d",
1619 __func__, error);
1620 goto done;
1621 }
1622 user_msg->msg_name = msghdrx64.msg_name;
1623 user_msg->msg_namelen = msghdrx64.msg_namelen;
1624 user_msg->msg_iov = msghdrx64.msg_iov;
1625 user_msg->msg_iovlen = msghdrx64.msg_iovlen;
1626 user_msg->msg_control = msghdrx64.msg_control;
1627 user_msg->msg_controllen = msghdrx64.msg_controllen;
1628 } else {
1629 struct user32_msghdr_x msghdrx32;
1630
1631 error = copyin((user_addr_t)user_msghdr_x_src,
1632 &msghdrx32, sizeof(msghdrx32));
1633 if (error != 0) {
1634 DBG_PRINTF("%s copyin() msghdrx32 failed %d",
1635 __func__, error);
1636 goto done;
1637 }
1638 user_msg->msg_name = msghdrx32.msg_name;
1639 user_msg->msg_namelen = msghdrx32.msg_namelen;
1640 user_msg->msg_iov = msghdrx32.msg_iov;
1641 user_msg->msg_iovlen = msghdrx32.msg_iovlen;
1642 user_msg->msg_control = msghdrx32.msg_control;
1643 user_msg->msg_controllen = msghdrx32.msg_controllen;
1644 }
1645 /* msg_flags is ignored for send */
1646 user_msg->msg_flags = 0;
1647
1648 if (user_msg->msg_iovlen <= 0 || user_msg->msg_iovlen > UIO_MAXIOV) {
1649 error = EMSGSIZE;
1650 DBG_PRINTF("%s bad msg_iovlen, error %d",
1651 __func__, error);
1652 goto done;
1653 }
1654 /*
1655 * Attempt to reuse the uio if large enough, otherwise we need
1656 * a new one
1657 */
1658 if (auio != NULL) {
1659 if (auio->uio_max_iovs >= user_msg->msg_iovlen) {
1660 uio_reset_fast(auio, 0,
1661 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1662 UIO_WRITE);
1663 } else {
1664 uio_free(auio);
1665 auio = NULL;
1666 }
1667 }
1668 if (auio == NULL) {
1669 auio = uio_create(user_msg->msg_iovlen, 0,
1670 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1671 UIO_WRITE);
1672 if (auio == NULL) {
1673 error = ENOBUFS;
1674 DBG_PRINTF("%s uio_create() failed %d",
1675 __func__, error);
1676 goto done;
1677 }
1678 }
1679
1680 if (user_msg->msg_iovlen) {
1681 /*
1682 * get location of iovecs within the uio.
1683 * then copyin the iovecs from user space.
1684 */
1685 struct user_iovec *iovp = uio_iovsaddr_user(auio);
1686 if (iovp == NULL) {
1687 error = ENOBUFS;
1688 goto done;
1689 }
1690 error = copyin_user_iovec_array(user_msg->msg_iov,
1691 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1692 user_msg->msg_iovlen, iovp);
1693 if (error != 0) {
1694 goto done;
1695 }
1696 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
1697
1698 /* finish setup of uio_t */
1699 error = uio_calculateresid_user(auio);
1700 if (error) {
1701 goto done;
1702 }
1703 } else {
1704 user_msg->msg_iov = 0;
1705 }
1706
1707 done:
1708 *auiop = auio;
1709 return error;
1710 }
1711
1712 static int
mbuf_packet_from_uio(socket_ref_t so,mbuf_ref_ref_t mp,uio_t auio)1713 mbuf_packet_from_uio(socket_ref_t so, mbuf_ref_ref_t mp, uio_t auio)
1714 {
1715 int error = 0;
1716 uint16_t headroom = 0;
1717 size_t bytes_to_alloc;
1718 mbuf_ref_t top = NULL, m;
1719
1720 if (soreserveheadroom != 0) {
1721 headroom = so->so_pktheadroom;
1722 }
1723 bytes_to_alloc = headroom + uio_resid(auio);
1724
1725 error = mbuf_allocpacket(MBUF_WAITOK, bytes_to_alloc, NULL, &top);
1726 if (error != 0) {
1727 os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: mbuf_allocpacket %zu error %d",
1728 bytes_to_alloc, error);
1729 goto done;
1730 }
1731
1732 if (headroom > 0 && headroom < mbuf_maxlen(top)) {
1733 top->m_data += headroom;
1734 }
1735
1736 for (m = top; m != NULL; m = m->m_next) {
1737 int bytes_to_copy = (int)uio_resid(auio);
1738 ssize_t mlen;
1739
1740 if ((m->m_flags & M_EXT)) {
1741 mlen = m->m_ext.ext_size -
1742 M_LEADINGSPACE(m);
1743 } else if ((m->m_flags & M_PKTHDR)) {
1744 mlen = MHLEN - M_LEADINGSPACE(m);
1745 m_add_crumb(m, PKT_CRUMB_SOSEND);
1746 } else {
1747 mlen = MLEN - M_LEADINGSPACE(m);
1748 }
1749 int len = imin((int)mlen, bytes_to_copy);
1750
1751 error = uio_copyin_user(mtod(m, caddr_t), (int)len, auio);
1752 if (error != 0) {
1753 os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: len %d error %d",
1754 len, error);
1755 goto done;
1756 }
1757 m->m_len = len;
1758 top->m_pkthdr.len += len;
1759 }
1760
1761 done:
1762 if (error != 0) {
1763 m_freem(top);
1764 } else {
1765 *mp = top;
1766 }
1767 return error;
1768 }
1769
1770 static int
sendit_x(proc_ref_t p,socket_ref_t so,struct sendmsg_x_args * uap,u_int * retval)1771 sendit_x(proc_ref_t p, socket_ref_t so, struct sendmsg_x_args *uap, u_int *retval)
1772 {
1773 int error = 0;
1774 uio_t __single auio = NULL;
1775 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1776 void *src;
1777 MBUFQ_HEAD() pktlist = {};
1778 size_t total_pkt_len = 0;
1779 u_int pkt_cnt = 0;
1780 int flags = uap->flags;
1781 mbuf_ref_t top;
1782
1783 MBUFQ_INIT(&pktlist);
1784
1785 *retval = 0;
1786
1787 /* We re-use the uio when possible */
1788 auio = uio_create(1, 0,
1789 (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1790 UIO_WRITE);
1791 if (auio == NULL) {
1792 error = ENOBUFS;
1793 DBG_PRINTF("%s uio_create() failed %d",
1794 __func__, error);
1795 goto done;
1796 }
1797
1798 src = __unsafe_forge_bidi_indexable(void *, uap->msgp, uap->cnt);
1799
1800 /*
1801 * Create a list of packets
1802 */
1803 for (u_int i = 0; i < uap->cnt; i++) {
1804 struct user_msghdr user_msg = {};
1805 mbuf_ref_t m = NULL;
1806
1807 if (is_p_64bit_process) {
1808 error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user64_msghdr_x *)src) + i);
1809 if (error != 0) {
1810 os_log(OS_LOG_DEFAULT, "sendit_x: internalize_user_msg_x error %d\n", error);
1811 goto done;
1812 }
1813 } else {
1814 error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user32_msghdr_x *)src) + i);
1815 if (error != 0) {
1816 os_log(OS_LOG_DEFAULT, "sendit_x: internalize_user_msg_x error %d\n", error);
1817 goto done;
1818 }
1819 }
1820 /*
1821 * Stop on the first datagram that is too large
1822 */
1823 if (uio_resid(auio) > so->so_snd.sb_hiwat) {
1824 if (i == 0) {
1825 error = EMSGSIZE;
1826 goto done;
1827 }
1828 break;
1829 }
1830 /*
1831 * An mbuf packet has the control mbuf(s) followed by data
1832 * We allocate the mbufs in reverse order
1833 */
1834 error = mbuf_packet_from_uio(so, &m, auio);
1835 if (error != 0) {
1836 os_log(OS_LOG_DEFAULT, "sendit_x: mbuf_packet_from_uio error %d\n", error);
1837 goto done;
1838 }
1839 total_pkt_len += m->m_pkthdr.len;
1840
1841 if (user_msg.msg_control != USER_ADDR_NULL && user_msg.msg_controllen != 0) {
1842 mbuf_ref_t control = NULL;
1843
1844 error = sockargs(&control, user_msg.msg_control, user_msg.msg_controllen, MT_CONTROL);
1845 if (error != 0) {
1846 os_log(OS_LOG_DEFAULT, "sendit_x: sockargs error %d\n", error);
1847 goto done;
1848 }
1849 control->m_next = m;
1850 m = control;
1851 }
1852 MBUFQ_ENQUEUE(&pktlist, m);
1853
1854 pkt_cnt += 1;
1855 }
1856
1857 top = MBUFQ_FIRST(&pktlist);
1858 MBUFQ_INIT(&pktlist);
1859 error = sosend_list(so, top, total_pkt_len, &pkt_cnt, flags);
1860 if (error != 0 && error != ENOBUFS) {
1861 os_log(OS_LOG_DEFAULT, "sendit_x: sosend_list error %d\n", error);
1862 }
1863 done:
1864 *retval = pkt_cnt;
1865
1866 if (auio != NULL) {
1867 uio_free(auio);
1868 }
1869 MBUFQ_DRAIN(&pktlist);
1870 return error;
1871 }
1872
1873 int
sendmsg_x(proc_ref_t p,struct sendmsg_x_args * uap,user_ssize_t * retval)1874 sendmsg_x(proc_ref_t p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1875 {
1876 void *src;
1877 int error;
1878 uio_t __single auio = NULL;
1879 socket_ref_t so;
1880 u_int uiocnt = 0;
1881 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1882
1883 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1884 AUDIT_ARG(fd, uap->s);
1885
1886 if (uap->flags & MSG_SKIPCFIL) {
1887 error = EPERM;
1888 goto done_no_filedrop;
1889 }
1890
1891 error = file_socket(uap->s, &so);
1892 if (error) {
1893 goto done_no_filedrop;
1894 }
1895 if (so == NULL) {
1896 error = EBADF;
1897 goto done;
1898 }
1899
1900 /*
1901 * For an atomic datagram connected socket we can build the list of
1902 * mbuf packets with sosend_list()
1903 */
1904 if (so->so_type == SOCK_DGRAM && sosendallatonce(so) &&
1905 (so->so_state & SS_ISCONNECTED) && sendmsg_x_mode != 1) {
1906 error = sendit_x(p, so, uap, &uiocnt);
1907 if (error != 0) {
1908 DBG_PRINTF("%s sendit_x() failed %d",
1909 __func__, error);
1910 }
1911 goto done;
1912 }
1913
1914 src = __unsafe_forge_bidi_indexable(void *, uap->msgp, uap->cnt);
1915
1916 /* We re-use the uio when possible */
1917 auio = uio_create(1, 0,
1918 (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1919 UIO_WRITE);
1920 if (auio == NULL) {
1921 error = ENOBUFS;
1922 DBG_PRINTF("%s uio_create() failed %d",
1923 __func__, error);
1924 goto done;
1925 }
1926
1927 for (u_int i = 0; i < uap->cnt; i++) {
1928 struct user_msghdr user_msg = {};
1929
1930 if (is_p_64bit_process) {
1931 error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user64_msghdr_x *)src) + i);
1932 if (error != 0) {
1933 goto done;
1934 }
1935 } else {
1936 error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user32_msghdr_x *)src) + i);
1937 if (error != 0) {
1938 goto done;
1939 }
1940 }
1941
1942 int32_t len = 0;
1943 error = sendit(p, so, &user_msg, auio, uap->flags, &len);
1944 if (error != 0) {
1945 break;
1946 }
1947 uiocnt += 1;
1948 }
1949 done:
1950 if (error != 0) {
1951 if (uiocnt != 0 && (error == ERESTART ||
1952 error == EINTR || error == EWOULDBLOCK ||
1953 error == ENOBUFS || error == EMSGSIZE)) {
1954 error = 0;
1955 }
1956 /* Generation of SIGPIPE can be controlled per socket */
1957 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1958 !(uap->flags & MSG_NOSIGNAL)) {
1959 psignal(p, SIGPIPE);
1960 }
1961 }
1962 if (error == 0) {
1963 *retval = (int)(uiocnt);
1964 }
1965 file_drop(uap->s);
1966
1967 done_no_filedrop:
1968 if (auio != NULL) {
1969 uio_free(auio);
1970 }
1971 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1972
1973 return error;
1974 }
1975
1976
1977 static int
copyout_sa(sockaddr_ref_t fromsa,user_addr_t name,socklen_t * namelen)1978 copyout_sa(sockaddr_ref_t fromsa, user_addr_t name, socklen_t *namelen)
1979 {
1980 int error = 0;
1981 socklen_t sa_len = 0;
1982 ssize_t len;
1983
1984 len = *namelen;
1985 if (len <= 0 || fromsa == 0) {
1986 len = 0;
1987 } else {
1988 #ifndef MIN
1989 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1990 #endif
1991 sa_len = fromsa->sa_len;
1992 len = MIN((unsigned int)len, sa_len);
1993 error = copyout(__SA_UTILS_CONV_TO_BYTES(fromsa), name, (unsigned)len);
1994 if (error) {
1995 goto out;
1996 }
1997 }
1998 *namelen = sa_len;
1999 out:
2000 return 0;
2001 }
2002
2003 static int
copyout_maddr(struct mbuf * m,user_addr_t name,socklen_t * namelen)2004 copyout_maddr(struct mbuf *m, user_addr_t name, socklen_t *namelen)
2005 {
2006 int error = 0;
2007 socklen_t sa_len = 0;
2008 ssize_t len;
2009
2010 len = *namelen;
2011 if (len <= 0 || m == NULL) {
2012 len = 0;
2013 } else {
2014 #ifndef MIN
2015 #define MIN(a, b) ((a) > (b) ? (b) : (a))
2016 #endif
2017 struct sockaddr *fromsa = mtod(m, struct sockaddr *);
2018
2019 sa_len = fromsa->sa_len;
2020 len = MIN((unsigned int)len, sa_len);
2021 error = copyout(fromsa, name, (unsigned)len);
2022 if (error != 0) {
2023 goto out;
2024 }
2025 }
2026 *namelen = sa_len;
2027 out:
2028 return 0;
2029 }
2030
2031 static int
copyout_control(proc_ref_t p,mbuf_ref_t m,user_addr_t control,socklen_ref_t controllen,int_ref_t flags,socket_ref_t so)2032 copyout_control(proc_ref_t p, mbuf_ref_t m, user_addr_t control,
2033 socklen_ref_t controllen, int_ref_t flags, socket_ref_t so)
2034 {
2035 int error = 0;
2036 socklen_t len;
2037 user_addr_t ctlbuf;
2038 struct inpcb *inp = NULL;
2039 bool want_pktinfo = false;
2040 bool seen_pktinfo = false;
2041
2042 if (so != NULL && (SOCK_DOM(so) == PF_INET6 || SOCK_DOM(so) == PF_INET)) {
2043 inp = sotoinpcb(so);
2044 want_pktinfo = (inp->inp_flags & IN6P_PKTINFO) != 0;
2045 }
2046
2047 len = *controllen;
2048 *controllen = 0;
2049 ctlbuf = control;
2050
2051 while (m && len > 0) {
2052 socklen_t tocopy;
2053 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
2054 socklen_t cp_size = CMSG_ALIGN(cp->cmsg_len);
2055 socklen_t buflen = m->m_len;
2056
2057 while (buflen > 0 && len > 0) {
2058 /*
2059 * SCM_TIMESTAMP hack because struct timeval has a
2060 * different size for 32 bits and 64 bits processes
2061 */
2062 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
2063 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
2064 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
2065 socklen_t tmp_space;
2066 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
2067
2068 tmp_cp->cmsg_level = SOL_SOCKET;
2069 tmp_cp->cmsg_type = SCM_TIMESTAMP;
2070
2071 if (proc_is64bit(p)) {
2072 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
2073
2074 os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
2075 os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
2076
2077 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
2078 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
2079 } else {
2080 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
2081
2082 tv32->tv_sec = (user32_time_t)tv->tv_sec;
2083 tv32->tv_usec = tv->tv_usec;
2084
2085 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
2086 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
2087 }
2088 if (len >= tmp_space) {
2089 tocopy = tmp_space;
2090 } else {
2091 *flags |= MSG_CTRUNC;
2092 tocopy = len;
2093 }
2094 error = copyout(tmp_buffer, ctlbuf, tocopy);
2095 if (error) {
2096 goto out;
2097 }
2098 } else {
2099 /* If socket has flow tracking and socket did not request address, ignore it */
2100 if (SOFLOW_ENABLED(so) &&
2101 ((cp->cmsg_level == IPPROTO_IP && cp->cmsg_type == IP_RECVDSTADDR && inp != NULL &&
2102 !(inp->inp_flags & INP_RECVDSTADDR)) ||
2103 (cp->cmsg_level == IPPROTO_IPV6 && (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO) && inp &&
2104 !(inp->inp_flags & IN6P_PKTINFO)))) {
2105 tocopy = 0;
2106 } else {
2107 if (cp_size > buflen) {
2108 panic("cp_size > buflen, something wrong with alignment!");
2109 }
2110 if (len >= cp_size) {
2111 tocopy = cp_size;
2112 } else {
2113 *flags |= MSG_CTRUNC;
2114 tocopy = len;
2115 }
2116 error = copyout((caddr_t) cp, ctlbuf, tocopy);
2117 if (error) {
2118 goto out;
2119 }
2120 if (want_pktinfo && cp->cmsg_level == IPPROTO_IPV6 &&
2121 (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO)) {
2122 seen_pktinfo = true;
2123 }
2124 }
2125 }
2126
2127
2128 ctlbuf += tocopy;
2129 len -= tocopy;
2130
2131 buflen -= cp_size;
2132 cp = (struct cmsghdr *)(void *)
2133 ((unsigned char *) cp + cp_size);
2134 cp_size = CMSG_ALIGN(cp->cmsg_len);
2135 }
2136
2137 m = m->m_next;
2138 }
2139 *controllen = (socklen_t)(ctlbuf - control);
2140 out:
2141 if (want_pktinfo && !seen_pktinfo) {
2142 missingpktinfo += 1;
2143 #if (DEBUG || DEVELOPMENT)
2144 char pname[MAXCOMLEN];
2145 char local[MAX_IPv6_STR_LEN + 6];
2146 char remote[MAX_IPv6_STR_LEN + 6];
2147
2148 proc_name(so->last_pid, pname, sizeof(MAXCOMLEN));
2149 if (inp->inp_vflag & INP_IPV6) {
2150 inet_ntop(AF_INET6, &inp->in6p_laddr.s6_addr, local, sizeof(local));
2151 inet_ntop(AF_INET6, &inp->in6p_faddr.s6_addr, remote, sizeof(local));
2152 } else {
2153 inet_ntop(AF_INET, &inp->inp_laddr.s_addr, local, sizeof(local));
2154 inet_ntop(AF_INET, &inp->inp_faddr.s_addr, remote, sizeof(local));
2155 }
2156
2157 os_log(OS_LOG_DEFAULT,
2158 "cmsg IPV6_PKTINFO missing for %s:%u > %s:%u proc %s.%u error %d\n",
2159 local, ntohs(inp->inp_lport), remote, ntohs(inp->inp_fport),
2160 pname, so->last_pid, error);
2161 #endif /* (DEBUG || DEVELOPMENT) */
2162 }
2163 return error;
2164 }
2165
2166 /*
2167 * Returns: 0 Success
2168 * ENOTSOCK
2169 * EINVAL
2170 * EBADF
2171 * EACCES Mandatory Access Control failure
2172 * copyout:EFAULT
2173 * fp_lookup:EBADF
2174 * <pru_soreceive>:ENOBUFS
2175 * <pru_soreceive>:ENOTCONN
2176 * <pru_soreceive>:EWOULDBLOCK
2177 * <pru_soreceive>:EFAULT
2178 * <pru_soreceive>:EINTR
2179 * <pru_soreceive>:EBADF
2180 * <pru_soreceive>:EINVAL
2181 * <pru_soreceive>:EMSGSIZE
2182 * <pru_soreceive>:???
2183 *
2184 * Notes: Additional return values from calls through <pru_soreceive>
2185 * depend on protocols other than TCP or AF_UNIX, which are
2186 * documented above.
2187 */
2188 static int
recvit(proc_ref_t p,int s,user_msghdr_ref_t mp,uio_t uiop,user_addr_t namelenp,int32_ref_t retval)2189 recvit(proc_ref_t p, int s, user_msghdr_ref_t mp, uio_t uiop,
2190 user_addr_t namelenp, int32_ref_t retval)
2191 {
2192 ssize_t len;
2193 int error;
2194 mbuf_ref_t control = 0;
2195 socket_ref_t so;
2196 sockaddr_ref_t fromsa = 0;
2197 fileproc_ref_t fp;
2198
2199 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
2200 if ((error = fp_get_ftype(p, s, DTYPE_SOCKET, ENOTSOCK, &fp))) {
2201 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2202 return error;
2203 }
2204 so = (struct socket *)fp_get_data(fp);
2205
2206 #if CONFIG_MACF_SOCKET_SUBSET
2207 /*
2208 * We check the state without holding the socket lock;
2209 * if a race condition occurs, it would simply result
2210 * in an extra call to the MAC check function.
2211 */
2212 if (!(so->so_state & SS_DEFUNCT) &&
2213 !(so->so_state & SS_ISCONNECTED) &&
2214 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2215 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2216 goto out1;
2217 }
2218 #endif /* MAC_SOCKET_SUBSET */
2219 if (uio_resid(uiop) < 0 || uio_resid(uiop) > INT_MAX) {
2220 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
2221 error = EINVAL;
2222 goto out1;
2223 }
2224
2225 len = uio_resid(uiop);
2226 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
2227 NULL, mp->msg_control ? &control : NULL,
2228 &mp->msg_flags);
2229 if (fromsa) {
2230 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
2231 fromsa);
2232 }
2233 if (error) {
2234 if (uio_resid(uiop) != len && (error == ERESTART ||
2235 error == EINTR || error == EWOULDBLOCK)) {
2236 error = 0;
2237 }
2238 }
2239 if (error) {
2240 goto out;
2241 }
2242
2243 *retval = (int32_t)(len - uio_resid(uiop));
2244
2245 if (mp->msg_name) {
2246 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
2247 if (error) {
2248 goto out;
2249 }
2250 /* return the actual, untruncated address length */
2251 if (namelenp &&
2252 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2253 sizeof(int)))) {
2254 goto out;
2255 }
2256 }
2257
2258 if (mp->msg_control) {
2259 error = copyout_control(p, control, mp->msg_control,
2260 &mp->msg_controllen, &mp->msg_flags, so);
2261 }
2262 out:
2263 free_sockaddr(fromsa);
2264 if (control) {
2265 m_freem(control);
2266 }
2267 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2268 out1:
2269 fp_drop(p, s, fp, 0);
2270 return error;
2271 }
2272
2273 /*
2274 * Returns: 0 Success
2275 * ENOMEM
2276 * copyin:EFAULT
2277 * recvit:???
2278 * read:??? [4056224: applicable for pipes]
2279 *
2280 * Notes: The read entry point is only called as part of support for
2281 * binary backward compatability; new code should use read
2282 * instead of recv or recvfrom when attempting to read data
2283 * from pipes.
2284 *
2285 * For full documentation of the return codes from recvit, see
2286 * the block header for the recvit function.
2287 */
2288 int
recvfrom(proc_ref_t p,struct recvfrom_args * uap,int32_ref_t retval)2289 recvfrom(proc_ref_t p, struct recvfrom_args *uap, int32_ref_t retval)
2290 {
2291 __pthread_testcancel(1);
2292 return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2293 retval);
2294 }
2295
2296 int
recvfrom_nocancel(proc_ref_t p,struct recvfrom_nocancel_args * uap,int32_ref_t retval)2297 recvfrom_nocancel(proc_ref_t p, struct recvfrom_nocancel_args *uap,
2298 int32_ref_t retval)
2299 {
2300 struct user_msghdr msg;
2301 int error;
2302 uio_t __single auio = NULL;
2303
2304 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2305 AUDIT_ARG(fd, uap->s);
2306
2307 if (uap->fromlenaddr) {
2308 error = copyin(uap->fromlenaddr,
2309 (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2310 if (error) {
2311 return error;
2312 }
2313 } else {
2314 msg.msg_namelen = 0;
2315 }
2316 msg.msg_name = uap->from;
2317 auio = uio_create(1, 0,
2318 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2319 UIO_READ);
2320 if (auio == NULL) {
2321 return ENOMEM;
2322 }
2323
2324 uio_addiov(auio, uap->buf, uap->len);
2325 /* no need to set up msg_iov. recvit uses uio_t we send it */
2326 msg.msg_iov = 0;
2327 msg.msg_iovlen = 0;
2328 msg.msg_control = 0;
2329 msg.msg_controllen = 0;
2330 msg.msg_flags = uap->flags;
2331 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2332 if (auio != NULL) {
2333 uio_free(auio);
2334 }
2335
2336 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2337
2338 return error;
2339 }
2340
2341 /*
2342 * Returns: 0 Success
2343 * EMSGSIZE
2344 * ENOMEM
2345 * copyin:EFAULT
2346 * copyout:EFAULT
2347 * recvit:???
2348 *
2349 * Notes: For full documentation of the return codes from recvit, see
2350 * the block header for the recvit function.
2351 */
2352 int
recvmsg(proc_ref_t p,struct recvmsg_args * uap,int32_ref_t retval)2353 recvmsg(proc_ref_t p, struct recvmsg_args *uap, int32_ref_t retval)
2354 {
2355 __pthread_testcancel(1);
2356 return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2357 retval);
2358 }
2359
2360 int
recvmsg_nocancel(proc_ref_t p,struct recvmsg_nocancel_args * uap,int32_ref_t retval)2361 recvmsg_nocancel(proc_ref_t p, struct recvmsg_nocancel_args *uap,
2362 int32_ref_t retval)
2363 {
2364 struct user32_msghdr msg32;
2365 struct user64_msghdr msg64;
2366 struct user_msghdr user_msg;
2367 caddr_t msghdrp;
2368 int size_of_msghdr;
2369 user_addr_t uiov;
2370 int error;
2371 uio_t __single auio = NULL;
2372 struct user_iovec *iovp;
2373
2374 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2375
2376 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2377 AUDIT_ARG(fd, uap->s);
2378 if (is_p_64bit_process) {
2379 msghdrp = (caddr_t)&msg64;
2380 size_of_msghdr = sizeof(msg64);
2381 } else {
2382 msghdrp = (caddr_t)&msg32;
2383 size_of_msghdr = sizeof(msg32);
2384 }
2385 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2386 if (error) {
2387 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2388 return error;
2389 }
2390
2391 /* only need to copy if user process is not 64-bit */
2392 if (is_p_64bit_process) {
2393 user_msg.msg_flags = msg64.msg_flags;
2394 user_msg.msg_controllen = msg64.msg_controllen;
2395 user_msg.msg_control = (user_addr_t)msg64.msg_control;
2396 user_msg.msg_iovlen = msg64.msg_iovlen;
2397 user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
2398 user_msg.msg_namelen = msg64.msg_namelen;
2399 user_msg.msg_name = (user_addr_t)msg64.msg_name;
2400 } else {
2401 user_msg.msg_flags = msg32.msg_flags;
2402 user_msg.msg_controllen = msg32.msg_controllen;
2403 user_msg.msg_control = msg32.msg_control;
2404 user_msg.msg_iovlen = msg32.msg_iovlen;
2405 user_msg.msg_iov = msg32.msg_iov;
2406 user_msg.msg_namelen = msg32.msg_namelen;
2407 user_msg.msg_name = msg32.msg_name;
2408 }
2409
2410 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2411 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2412 0, 0, 0, 0);
2413 return EMSGSIZE;
2414 }
2415
2416 user_msg.msg_flags = uap->flags;
2417
2418 /* allocate a uio large enough to hold the number of iovecs passed */
2419 auio = uio_create(user_msg.msg_iovlen, 0,
2420 (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
2421 UIO_READ);
2422 if (auio == NULL) {
2423 error = ENOMEM;
2424 goto done;
2425 }
2426
2427 /*
2428 * get location of iovecs within the uio. then copyin the iovecs from
2429 * user space.
2430 */
2431 iovp = uio_iovsaddr_user(auio);
2432 if (iovp == NULL) {
2433 error = ENOMEM;
2434 goto done;
2435 }
2436 uiov = user_msg.msg_iov;
2437 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2438 error = copyin_user_iovec_array(uiov,
2439 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2440 user_msg.msg_iovlen, iovp);
2441 if (error) {
2442 goto done;
2443 }
2444
2445 /* finish setup of uio_t */
2446 error = uio_calculateresid_user(auio);
2447 if (error) {
2448 goto done;
2449 }
2450
2451 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2452 if (!error) {
2453 user_msg.msg_iov = uiov;
2454 if (is_p_64bit_process) {
2455 msg64.msg_flags = user_msg.msg_flags;
2456 msg64.msg_controllen = user_msg.msg_controllen;
2457 msg64.msg_control = user_msg.msg_control;
2458 msg64.msg_iovlen = user_msg.msg_iovlen;
2459 msg64.msg_iov = user_msg.msg_iov;
2460 msg64.msg_namelen = user_msg.msg_namelen;
2461 msg64.msg_name = user_msg.msg_name;
2462 } else {
2463 msg32.msg_flags = user_msg.msg_flags;
2464 msg32.msg_controllen = user_msg.msg_controllen;
2465 msg32.msg_control = (user32_addr_t)user_msg.msg_control;
2466 msg32.msg_iovlen = user_msg.msg_iovlen;
2467 msg32.msg_iov = (user32_addr_t)user_msg.msg_iov;
2468 msg32.msg_namelen = user_msg.msg_namelen;
2469 msg32.msg_name = (user32_addr_t)user_msg.msg_name;
2470 }
2471 error = copyout(msghdrp, uap->msg, size_of_msghdr);
2472 }
2473 done:
2474 if (auio != NULL) {
2475 uio_free(auio);
2476 }
2477 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2478 return error;
2479 }
2480
2481 __attribute__((noinline))
2482 static int
recvmsg_x_array(proc_ref_t p,socket_ref_t so,struct recvmsg_x_args * uap,user_ssize_t * retval)2483 recvmsg_x_array(proc_ref_t p, socket_ref_t so, struct recvmsg_x_args *uap, user_ssize_t *retval)
2484 {
2485 int error = EOPNOTSUPP;
2486 user_msghdr_x_ptr_t user_msg_x = NULL;
2487 recv_msg_elem_ptr_t recv_msg_array = NULL;
2488 user_ssize_t len_before = 0, len_after;
2489 size_t size_of_msghdr;
2490 void_ptr_t umsgp = NULL;
2491 u_int i;
2492 u_int uiocnt;
2493 int flags = uap->flags;
2494
2495 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2496
2497 size_of_msghdr = is_p_64bit_process ?
2498 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2499
2500 /*
2501 * Support only a subset of message flags
2502 */
2503 if (uap->flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA | MSG_NBIO)) {
2504 return EOPNOTSUPP;
2505 }
2506 /*
2507 * Input parameter range check
2508 */
2509 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2510 error = EINVAL;
2511 goto out;
2512 }
2513 if (uap->cnt > somaxrecvmsgx) {
2514 uap->cnt = somaxrecvmsgx > 0 ? somaxrecvmsgx : 1;
2515 }
2516
2517 user_msg_x = kalloc_type(struct user_msghdr_x, uap->cnt,
2518 Z_WAITOK | Z_ZERO);
2519 if (user_msg_x == NULL) {
2520 DBG_PRINTF("%s user_msg_x alloc failed", __func__);
2521 error = ENOMEM;
2522 goto out;
2523 }
2524 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2525 if (recv_msg_array == NULL) {
2526 DBG_PRINTF("%s alloc_recv_msg_array() failed", __func__);
2527 error = ENOMEM;
2528 goto out;
2529 }
2530
2531 umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
2532 if (umsgp == NULL) {
2533 DBG_PRINTF("%s umsgp alloc failed", __func__);
2534 error = ENOMEM;
2535 goto out;
2536 }
2537 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2538 if (error) {
2539 DBG_PRINTF("%s copyin() failed", __func__);
2540 goto out;
2541 }
2542 error = internalize_recv_msghdr_array(umsgp,
2543 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2544 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2545 if (error) {
2546 DBG_PRINTF("%s copyin_user_msghdr_array() failed", __func__);
2547 goto out;
2548 }
2549 /*
2550 * Make sure the size of each message iovec and
2551 * the aggregate size of all the iovec is valid
2552 */
2553 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2554 error = EINVAL;
2555 goto out;
2556 }
2557 /*
2558 * Sanity check on passed arguments
2559 */
2560 for (i = 0; i < uap->cnt; i++) {
2561 struct user_msghdr_x *mp = user_msg_x + i;
2562
2563 if (mp->msg_flags != 0) {
2564 error = EINVAL;
2565 goto out;
2566 }
2567 }
2568 #if CONFIG_MACF_SOCKET_SUBSET
2569 /*
2570 * We check the state without holding the socket lock;
2571 * if a race condition occurs, it would simply result
2572 * in an extra call to the MAC check function.
2573 */
2574 if (!(so->so_state & SS_DEFUNCT) &&
2575 !(so->so_state & SS_ISCONNECTED) &&
2576 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2577 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2578 goto out;
2579 }
2580 #endif /* MAC_SOCKET_SUBSET */
2581
2582 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2583
2584 for (i = 0; i < uap->cnt; i++) {
2585 struct recv_msg_elem *recv_msg_elem;
2586 uio_t auio;
2587 sockaddr_ref_ref_t psa;
2588 struct mbuf **controlp;
2589
2590 recv_msg_elem = recv_msg_array + i;
2591 auio = recv_msg_elem->uio;
2592
2593 /*
2594 * Do not block if we got at least one packet
2595 */
2596 if (i > 0) {
2597 flags |= MSG_DONTWAIT;
2598 }
2599
2600 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2601 &recv_msg_elem->psa : NULL;
2602 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2603 &recv_msg_elem->controlp : NULL;
2604
2605 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2606 auio, NULL, controlp, &flags);
2607 if (error) {
2608 break;
2609 }
2610 /*
2611 * We have some data
2612 */
2613 recv_msg_elem->which |= SOCK_MSG_DATA;
2614 /*
2615 * Set the messages flags for this packet
2616 */
2617 flags &= ~MSG_DONTWAIT;
2618 recv_msg_elem->flags = flags;
2619 /*
2620 * Stop on partial copy
2621 */
2622 if (recv_msg_elem->flags & (MSG_RCVMORE | MSG_TRUNC)) {
2623 break;
2624 }
2625 }
2626
2627 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2628
2629 if (error) {
2630 if (len_after != len_before && (error == ERESTART ||
2631 error == EINTR || error == EWOULDBLOCK)) {
2632 error = 0;
2633 } else {
2634 goto out;
2635 }
2636 }
2637
2638 uiocnt = externalize_recv_msghdr_array(p, so, umsgp,
2639 uap->cnt, user_msg_x, recv_msg_array, &error);
2640 if (error != 0) {
2641 goto out;
2642 }
2643
2644 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2645 if (error) {
2646 DBG_PRINTF("%s copyout() failed", __func__);
2647 goto out;
2648 }
2649 *retval = (int)(uiocnt);
2650
2651 out:
2652 kfree_data(umsgp, uap->cnt * size_of_msghdr);
2653 free_recv_msg_array(recv_msg_array, uap->cnt);
2654 kfree_type(struct user_msghdr_x, uap->cnt, user_msg_x);
2655
2656 return error;
2657 }
2658
2659 int
recvmsg_x(struct proc * p,struct recvmsg_x_args * uap,user_ssize_t * retval)2660 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2661 {
2662 int error = EOPNOTSUPP;
2663 socket_ref_t so;
2664 size_t size_of_msghdrx;
2665 caddr_t msghdrxp;
2666 struct user32_msghdr_x msghdrx32 = {};
2667 struct user64_msghdr_x msghdrx64 = {};
2668 int spacetype;
2669 u_int i;
2670 uio_t auio = NULL;
2671 caddr_t src;
2672 int flags;
2673 mbuf_ref_t pkt_list = NULL, m;
2674 mbuf_ref_t addr_list = NULL, m_addr;
2675 mbuf_ref_t ctl_list = NULL, control;
2676 u_int pktcnt;
2677
2678 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2679
2680 error = file_socket(uap->s, &so);
2681 if (error) {
2682 goto done_no_filedrop;
2683 }
2684 if (so == NULL) {
2685 error = EBADF;
2686 goto done;
2687 }
2688
2689 #if CONFIG_MACF_SOCKET_SUBSET
2690 /*
2691 * We check the state without holding the socket lock;
2692 * if a race condition occurs, it would simply result
2693 * in an extra call to the MAC check function.
2694 */
2695 if (!(so->so_state & SS_DEFUNCT) &&
2696 !(so->so_state & SS_ISCONNECTED) &&
2697 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2698 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2699 goto done;
2700 }
2701 #endif /* MAC_SOCKET_SUBSET */
2702
2703 /*
2704 * With soreceive_m_list, all packets must be uniform, with address and
2705 * control as they are returned in parallel lists and it's only guaranteed
2706 * when pru_send_list is supported
2707 */
2708 if (do_recvmsg_x_donttrunc != 0 || (so->so_options & SO_DONTTRUNC)) {
2709 error = recvmsg_x_array(p, so, uap, retval);
2710 goto done;
2711 }
2712
2713 /*
2714 * Input parameter range check
2715 */
2716 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2717 error = EINVAL;
2718 goto done;
2719 }
2720 if (uap->cnt > somaxrecvmsgx) {
2721 uap->cnt = somaxrecvmsgx > 0 ? somaxrecvmsgx : 1;
2722 }
2723
2724 if (IS_64BIT_PROCESS(p)) {
2725 msghdrxp = (caddr_t)&msghdrx64;
2726 size_of_msghdrx = sizeof(struct user64_msghdr_x);
2727 spacetype = UIO_USERSPACE64;
2728 } else {
2729 msghdrxp = (caddr_t)&msghdrx32;
2730 size_of_msghdrx = sizeof(struct user32_msghdr_x);
2731 spacetype = UIO_USERSPACE32;
2732 }
2733 src = __unsafe_forge_bidi_indexable(caddr_t, uap->msgp, uap->cnt);
2734
2735 flags = uap->flags;
2736
2737 /*
2738 * Only allow MSG_DONTWAIT
2739 */
2740 if ((flags & ~(MSG_DONTWAIT | MSG_NBIO)) != 0) {
2741 error = EINVAL;
2742 goto done;
2743 }
2744
2745 /*
2746 * Receive list of packet in a single call
2747 */
2748 pktcnt = uap->cnt;
2749 error = soreceive_m_list(so, &pktcnt, &addr_list, &pkt_list, &ctl_list,
2750 &flags);
2751 if (error != 0) {
2752 if (pktcnt != 0 && (error == ERESTART ||
2753 error == EINTR || error == EWOULDBLOCK)) {
2754 error = 0;
2755 } else {
2756 goto done;
2757 }
2758 }
2759
2760 m_addr = addr_list;
2761 m = pkt_list;
2762 control = ctl_list;
2763
2764 for (i = 0; i < pktcnt; i++) {
2765 struct user_msghdr user_msg;
2766 ssize_t len;
2767 struct user_iovec *iovp;
2768 struct mbuf *n;
2769
2770 if (!m_has_mtype(m, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
2771 panic("%s: m %p m_type %d != MT_DATA", __func__, m, m->m_type);
2772 }
2773
2774 error = copyin((user_addr_t)(src + i * size_of_msghdrx),
2775 msghdrxp, size_of_msghdrx);
2776 if (error) {
2777 DBG_PRINTF("%s copyin() msghdrx failed %d\n",
2778 __func__, error);
2779 goto done;
2780 }
2781 if (spacetype == UIO_USERSPACE64) {
2782 user_msg.msg_name = msghdrx64.msg_name;
2783 user_msg.msg_namelen = msghdrx64.msg_namelen;
2784 user_msg.msg_iov = msghdrx64.msg_iov;
2785 user_msg.msg_iovlen = msghdrx64.msg_iovlen;
2786 user_msg.msg_control = msghdrx64.msg_control;
2787 user_msg.msg_controllen = msghdrx64.msg_controllen;
2788 } else {
2789 user_msg.msg_name = msghdrx32.msg_name;
2790 user_msg.msg_namelen = msghdrx32.msg_namelen;
2791 user_msg.msg_iov = msghdrx32.msg_iov;
2792 user_msg.msg_iovlen = msghdrx32.msg_iovlen;
2793 user_msg.msg_control = msghdrx32.msg_control;
2794 user_msg.msg_controllen = msghdrx32.msg_controllen;
2795 }
2796 user_msg.msg_flags = 0;
2797 if (user_msg.msg_iovlen <= 0 ||
2798 user_msg.msg_iovlen > UIO_MAXIOV) {
2799 error = EMSGSIZE;
2800 DBG_PRINTF("%s bad msg_iovlen, error %d\n",
2801 __func__, error);
2802 goto done;
2803 }
2804 /*
2805 * Attempt to reuse the uio if large enough, otherwise we need
2806 * a new one
2807 */
2808 if (auio != NULL) {
2809 if (auio->uio_max_iovs <= user_msg.msg_iovlen) {
2810 uio_reset_fast(auio, 0, spacetype, UIO_READ);
2811 } else {
2812 uio_free(auio);
2813 auio = NULL;
2814 }
2815 }
2816 if (auio == NULL) {
2817 auio = uio_create(user_msg.msg_iovlen, 0, spacetype,
2818 UIO_READ);
2819 if (auio == NULL) {
2820 error = ENOBUFS;
2821 DBG_PRINTF("%s uio_create() failed %d\n",
2822 __func__, error);
2823 goto done;
2824 }
2825 }
2826 /*
2827 * get location of iovecs within the uio then copy the iovecs
2828 * from user space.
2829 */
2830 iovp = uio_iovsaddr_user(auio);
2831 if (iovp == NULL) {
2832 error = ENOMEM;
2833 DBG_PRINTF("%s uio_iovsaddr() failed %d\n",
2834 __func__, error);
2835 goto done;
2836 }
2837 error = copyin_user_iovec_array(user_msg.msg_iov,
2838 spacetype, user_msg.msg_iovlen, iovp);
2839 if (error != 0) {
2840 DBG_PRINTF("%s copyin_user_iovec_array() failed %d\n",
2841 __func__, error);
2842 goto done;
2843 }
2844 error = uio_calculateresid_user(auio);
2845 if (error != 0) {
2846 DBG_PRINTF("%s uio_calculateresid() failed %d\n",
2847 __func__, error);
2848 goto done;
2849 }
2850 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2851
2852 len = uio_resid(auio);
2853 for (n = m; n != NULL; n = n->m_next) {
2854 user_ssize_t resid = uio_resid(auio);
2855 if (resid < n->m_len) {
2856 error = uio_copyout_user(mtod(n, caddr_t), (int)n->m_len, auio);
2857 if (error != 0) {
2858 DBG_PRINTF("%s uiomove() failed\n",
2859 __func__);
2860 goto done;
2861 }
2862 flags |= MSG_TRUNC;
2863 break;
2864 }
2865
2866 error = uio_copyout_user(mtod(n, caddr_t), (int)n->m_len, auio);
2867 if (error != 0) {
2868 DBG_PRINTF("%s uiomove() failed\n",
2869 __func__);
2870 goto done;
2871 }
2872 }
2873 len -= uio_resid(auio);
2874
2875 if (user_msg.msg_name != 0 && user_msg.msg_namelen != 0) {
2876 error = copyout_maddr(m_addr, user_msg.msg_name,
2877 &user_msg.msg_namelen);
2878 if (error) {
2879 DBG_PRINTF("%s copyout_maddr() failed\n",
2880 __func__);
2881 goto done;
2882 }
2883 }
2884 if (user_msg.msg_control != 0 && user_msg.msg_controllen != 0) {
2885 error = copyout_control(p, control,
2886 user_msg.msg_control, &user_msg.msg_controllen,
2887 &user_msg.msg_flags, so);
2888 if (error) {
2889 DBG_PRINTF("%s copyout_control() failed\n",
2890 __func__);
2891 goto done;
2892 }
2893 }
2894 /*
2895 * Note: the original msg_iovlen and msg_iov do not change
2896 */
2897 if (spacetype == UIO_USERSPACE64) {
2898 msghdrx64.msg_flags = user_msg.msg_flags;
2899 msghdrx64.msg_controllen = user_msg.msg_controllen;
2900 msghdrx64.msg_control = user_msg.msg_control;
2901 msghdrx64.msg_namelen = user_msg.msg_namelen;
2902 msghdrx64.msg_name = user_msg.msg_name;
2903 msghdrx64.msg_datalen = len;
2904 } else {
2905 msghdrx32.msg_flags = user_msg.msg_flags;
2906 msghdrx32.msg_controllen = user_msg.msg_controllen;
2907 msghdrx32.msg_control = (user32_addr_t) user_msg.msg_control;
2908 msghdrx32.msg_name = user_msg.msg_namelen;
2909 msghdrx32.msg_name = (user32_addr_t) user_msg.msg_name;
2910 msghdrx32.msg_datalen = (user32_size_t) len;
2911 }
2912 error = copyout(msghdrxp,
2913 (user_addr_t)(src + i * size_of_msghdrx),
2914 size_of_msghdrx);
2915 if (error) {
2916 DBG_PRINTF("%s copyout() msghdrx failed\n", __func__);
2917 goto done;
2918 }
2919
2920 m = m->m_nextpkt;
2921 if (control != NULL) {
2922 control = control->m_nextpkt;
2923 }
2924 if (m_addr != NULL) {
2925 m_addr = m_addr->m_nextpkt;
2926 }
2927 }
2928
2929 uap->flags = flags;
2930
2931 *retval = (int)i;
2932 done:
2933 file_drop(uap->s);
2934
2935 done_no_filedrop:
2936 if (pkt_list != NULL) {
2937 m_freem_list(pkt_list);
2938 }
2939 if (addr_list != NULL) {
2940 m_freem_list(addr_list);
2941 }
2942 if (ctl_list != NULL) {
2943 m_freem_list(ctl_list);
2944 }
2945 if (auio != NULL) {
2946 uio_free(auio);
2947 }
2948
2949 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2950
2951 return error;
2952 }
2953
2954 /*
2955 * Returns: 0 Success
2956 * EBADF
2957 * file_socket:ENOTSOCK
2958 * file_socket:EBADF
2959 * soshutdown:EINVAL
2960 * soshutdown:ENOTCONN
2961 * soshutdown:EADDRNOTAVAIL[TCP]
2962 * soshutdown:ENOBUFS[TCP]
2963 * soshutdown:EMSGSIZE[TCP]
2964 * soshutdown:EHOSTUNREACH[TCP]
2965 * soshutdown:ENETUNREACH[TCP]
2966 * soshutdown:ENETDOWN[TCP]
2967 * soshutdown:ENOMEM[TCP]
2968 * soshutdown:EACCES[TCP]
2969 * soshutdown:EMSGSIZE[TCP]
2970 * soshutdown:ENOBUFS[TCP]
2971 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2972 * soshutdown:??? [other protocol families]
2973 */
2974 /* ARGSUSED */
2975 int
shutdown(__unused proc_ref_t p,struct shutdown_args * uap,__unused int32_ref_t retval)2976 shutdown(__unused proc_ref_t p, struct shutdown_args *uap,
2977 __unused int32_ref_t retval)
2978 {
2979 socket_ref_t so;
2980 int error;
2981
2982 AUDIT_ARG(fd, uap->s);
2983 error = file_socket(uap->s, &so);
2984 if (error) {
2985 return error;
2986 }
2987 if (so == NULL) {
2988 error = EBADF;
2989 goto out;
2990 }
2991 error = soshutdown((struct socket *)so, uap->how);
2992 out:
2993 file_drop(uap->s);
2994 return error;
2995 }
2996
2997 /*
2998 * Returns: 0 Success
2999 * EFAULT
3000 * EINVAL
3001 * EACCES Mandatory Access Control failure
3002 * file_socket:ENOTSOCK
3003 * file_socket:EBADF
3004 * sosetopt:EINVAL
3005 * sosetopt:ENOPROTOOPT
3006 * sosetopt:ENOBUFS
3007 * sosetopt:EDOM
3008 * sosetopt:EFAULT
3009 * sosetopt:EOPNOTSUPP[AF_UNIX]
3010 * sosetopt:???
3011 */
3012 /* ARGSUSED */
3013 int
setsockopt(proc_ref_t p,setsockopt_args_ref_t uap,__unused int32_ref_t retval)3014 setsockopt(proc_ref_t p, setsockopt_args_ref_t uap,
3015 __unused int32_ref_t retval)
3016 {
3017 socket_ref_t so;
3018 struct sockopt sopt;
3019 int error;
3020
3021 AUDIT_ARG(fd, uap->s);
3022 if (uap->val == 0 && uap->valsize != 0) {
3023 return EFAULT;
3024 }
3025 /* No bounds checking on size (it's unsigned) */
3026
3027 error = file_socket(uap->s, &so);
3028 if (error) {
3029 return error;
3030 }
3031
3032 sopt.sopt_dir = SOPT_SET;
3033 sopt.sopt_level = uap->level;
3034 sopt.sopt_name = uap->name;
3035 sopt.sopt_val = uap->val;
3036 sopt.sopt_valsize = uap->valsize;
3037 sopt.sopt_p = p;
3038
3039 if (so == NULL) {
3040 error = EINVAL;
3041 goto out;
3042 }
3043 #if CONFIG_MACF_SOCKET_SUBSET
3044 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
3045 &sopt)) != 0) {
3046 goto out;
3047 }
3048 #endif /* MAC_SOCKET_SUBSET */
3049 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
3050 out:
3051 file_drop(uap->s);
3052 return error;
3053 }
3054
3055 /*
3056 * Returns: 0 Success
3057 * EINVAL
3058 * EBADF
3059 * EACCES Mandatory Access Control failure
3060 * copyin:EFAULT
3061 * copyout:EFAULT
3062 * file_socket:ENOTSOCK
3063 * file_socket:EBADF
3064 * sogetopt:???
3065 */
3066 int
getsockopt(proc_ref_t p,struct getsockopt_args * uap,__unused int32_ref_t retval)3067 getsockopt(proc_ref_t p, struct getsockopt_args *uap,
3068 __unused int32_ref_t retval)
3069 {
3070 int error;
3071 socklen_t valsize;
3072 struct sockopt sopt;
3073 socket_ref_t so;
3074
3075 error = file_socket(uap->s, &so);
3076 if (error) {
3077 return error;
3078 }
3079 if (uap->val) {
3080 error = copyin(uap->avalsize, (caddr_t)&valsize,
3081 sizeof(valsize));
3082 if (error) {
3083 goto out;
3084 }
3085 /* No bounds checking on size (it's unsigned) */
3086 } else {
3087 valsize = 0;
3088 }
3089 sopt.sopt_dir = SOPT_GET;
3090 sopt.sopt_level = uap->level;
3091 sopt.sopt_name = uap->name;
3092 sopt.sopt_val = uap->val;
3093 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
3094 sopt.sopt_p = p;
3095
3096 if (so == NULL) {
3097 error = EBADF;
3098 goto out;
3099 }
3100 #if CONFIG_MACF_SOCKET_SUBSET
3101 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
3102 &sopt)) != 0) {
3103 goto out;
3104 }
3105 #endif /* MAC_SOCKET_SUBSET */
3106 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
3107 if (error == 0) {
3108 valsize = (socklen_t)sopt.sopt_valsize;
3109 error = copyout((caddr_t)&valsize, uap->avalsize,
3110 sizeof(valsize));
3111 }
3112 out:
3113 file_drop(uap->s);
3114 return error;
3115 }
3116
3117
3118 /*
3119 * Get socket name.
3120 *
3121 * Returns: 0 Success
3122 * EBADF
3123 * file_socket:ENOTSOCK
3124 * file_socket:EBADF
3125 * copyin:EFAULT
3126 * copyout:EFAULT
3127 * <pru_sockaddr>:ENOBUFS[TCP]
3128 * <pru_sockaddr>:ECONNRESET[TCP]
3129 * <pru_sockaddr>:EINVAL[AF_UNIX]
3130 * <sf_getsockname>:???
3131 */
3132 /* ARGSUSED */
3133 int
getsockname(__unused proc_ref_t p,struct getsockname_args * uap,__unused int32_ref_t retval)3134 getsockname(__unused proc_ref_t p, struct getsockname_args *uap,
3135 __unused int32_ref_t retval)
3136 {
3137 socket_ref_t so;
3138 sockaddr_ref_t sa;
3139 socklen_t len;
3140 socklen_t sa_len;
3141 int error;
3142
3143 error = file_socket(uap->fdes, &so);
3144 if (error) {
3145 return error;
3146 }
3147 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
3148 if (error) {
3149 goto out;
3150 }
3151 if (so == NULL) {
3152 error = EBADF;
3153 goto out;
3154 }
3155 sa = 0;
3156 socket_lock(so, 1);
3157 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
3158 if (error == 0) {
3159 error = sflt_getsockname(so, &sa);
3160 if (error == EJUSTRETURN) {
3161 error = 0;
3162 }
3163 }
3164 socket_unlock(so, 1);
3165 if (error) {
3166 goto bad;
3167 }
3168 if (sa == 0) {
3169 len = 0;
3170 goto gotnothing;
3171 }
3172
3173 sa_len = sa->sa_len;
3174 len = MIN(len, sa_len);
3175 error = copyout(__SA_UTILS_CONV_TO_BYTES(sa), uap->asa, len);
3176 if (error) {
3177 goto bad;
3178 }
3179 /* return the actual, untruncated address length */
3180 len = sa_len;
3181 gotnothing:
3182 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
3183 bad:
3184 free_sockaddr(sa);
3185 out:
3186 file_drop(uap->fdes);
3187 return error;
3188 }
3189
3190 /*
3191 * Get name of peer for connected socket.
3192 *
3193 * Returns: 0 Success
3194 * EBADF
3195 * EINVAL
3196 * ENOTCONN
3197 * file_socket:ENOTSOCK
3198 * file_socket:EBADF
3199 * copyin:EFAULT
3200 * copyout:EFAULT
3201 * <pru_peeraddr>:???
3202 * <sf_getpeername>:???
3203 */
3204 /* ARGSUSED */
3205 int
getpeername(__unused proc_ref_t p,struct getpeername_args * uap,__unused int32_ref_t retval)3206 getpeername(__unused proc_ref_t p, struct getpeername_args *uap,
3207 __unused int32_ref_t retval)
3208 {
3209 socket_ref_t so;
3210 sockaddr_ref_t sa;
3211 socklen_t len;
3212 socklen_t sa_len;
3213 int error;
3214
3215 error = file_socket(uap->fdes, &so);
3216 if (error) {
3217 return error;
3218 }
3219 if (so == NULL) {
3220 error = EBADF;
3221 goto out;
3222 }
3223
3224 socket_lock(so, 1);
3225
3226 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
3227 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
3228 /* the socket has been shutdown, no more getpeername's */
3229 socket_unlock(so, 1);
3230 error = EINVAL;
3231 goto out;
3232 }
3233
3234 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
3235 socket_unlock(so, 1);
3236 error = ENOTCONN;
3237 goto out;
3238 }
3239 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
3240 if (error) {
3241 socket_unlock(so, 1);
3242 goto out;
3243 }
3244 sa = 0;
3245 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
3246 if (error == 0) {
3247 error = sflt_getpeername(so, &sa);
3248 if (error == EJUSTRETURN) {
3249 error = 0;
3250 }
3251 }
3252 socket_unlock(so, 1);
3253 if (error) {
3254 goto bad;
3255 }
3256 if (sa == 0) {
3257 len = 0;
3258 goto gotnothing;
3259 }
3260 sa_len = sa->sa_len;
3261 len = MIN(len, sa_len);
3262 error = copyout(__SA_UTILS_CONV_TO_BYTES(sa), uap->asa, len);
3263 if (error) {
3264 goto bad;
3265 }
3266 /* return the actual, untruncated address length */
3267 len = sa_len;
3268 gotnothing:
3269 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
3270 bad:
3271 free_sockaddr(sa);
3272 out:
3273 file_drop(uap->fdes);
3274 return error;
3275 }
3276
3277 int
sockargs(struct mbuf ** mp,user_addr_t data,socklen_t buflen,int type)3278 sockargs(struct mbuf **mp, user_addr_t data, socklen_t buflen, int type)
3279 {
3280 sockaddr_ref_t sa;
3281 struct mbuf *m;
3282 int error;
3283 socklen_t alloc_buflen = buflen;
3284
3285 if (buflen > INT_MAX / 2) {
3286 return EINVAL;
3287 }
3288 if (type == MT_SONAME && (buflen > SOCK_MAXADDRLEN ||
3289 buflen < offsetof(struct sockaddr, sa_data[0]))) {
3290 return EINVAL;
3291 }
3292 if (type == MT_CONTROL && buflen < sizeof(struct cmsghdr)) {
3293 return EINVAL;
3294 }
3295
3296 #ifdef __LP64__
3297 /*
3298 * The fd's in the buffer must expand to be pointers, thus we need twice
3299 * as much space
3300 */
3301 if (type == MT_CONTROL) {
3302 alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
3303 sizeof(struct cmsghdr);
3304 }
3305 #endif
3306 if (alloc_buflen > MLEN) {
3307 if (type == MT_SONAME && alloc_buflen <= 112) {
3308 alloc_buflen = MLEN; /* unix domain compat. hack */
3309 } else if (alloc_buflen > MCLBYTES) {
3310 return EINVAL;
3311 }
3312 }
3313 m = m_get(M_WAIT, type);
3314 if (m == NULL) {
3315 return ENOBUFS;
3316 }
3317 if (alloc_buflen > MLEN) {
3318 MCLGET(m, M_WAIT);
3319 if ((m->m_flags & M_EXT) == 0) {
3320 m_free(m);
3321 return ENOBUFS;
3322 }
3323 }
3324 /*
3325 * K64: We still copyin the original buflen because it gets expanded
3326 * later and we lie about the size of the mbuf because it only affects
3327 * unp_* functions
3328 */
3329 m->m_len = buflen;
3330 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
3331 if (error) {
3332 (void) m_free(m);
3333 } else {
3334 *mp = m;
3335 if (type == MT_SONAME) {
3336 VERIFY(buflen <= SOCK_MAXADDRLEN);
3337 sa = mtod(m, sockaddr_ref_t);
3338 sa->sa_len = (__uint8_t)buflen;
3339 }
3340 }
3341 return error;
3342 }
3343
3344 /*
3345 * Given a user_addr_t of length len, allocate and fill out a *sa.
3346 *
3347 * Returns: 0 Success
3348 * ENAMETOOLONG Filename too long
3349 * EINVAL Invalid argument
3350 * ENOMEM Not enough space
3351 * copyin:EFAULT Bad address
3352 */
3353 static int
getsockaddr(struct socket * so,sockaddr_ref_ref_t namp,user_addr_t uaddr,size_t len,boolean_t translate_unspec)3354 getsockaddr(struct socket *so, sockaddr_ref_ref_t namp, user_addr_t uaddr,
3355 size_t len, boolean_t translate_unspec)
3356 {
3357 struct sockaddr *sa;
3358 int error;
3359
3360 if (len > SOCK_MAXADDRLEN) {
3361 return ENAMETOOLONG;
3362 }
3363
3364 if (len < offsetof(struct sockaddr, sa_data[0])) {
3365 return EINVAL;
3366 }
3367
3368 sa = alloc_sockaddr(len, Z_WAITOK | Z_NOFAIL);
3369
3370 error = copyin(uaddr, (caddr_t)sa, len);
3371 if (error) {
3372 free_sockaddr(sa);
3373 } else {
3374 /*
3375 * Force sa_family to AF_INET on AF_INET sockets to handle
3376 * legacy applications that use AF_UNSPEC (0). On all other
3377 * sockets we leave it unchanged and let the lower layer
3378 * handle it.
3379 */
3380 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
3381 SOCK_CHECK_DOM(so, PF_INET) &&
3382 len == sizeof(struct sockaddr_in)) {
3383 sa->sa_family = AF_INET;
3384 }
3385 VERIFY(len <= SOCK_MAXADDRLEN);
3386 sa = *&sa;
3387 sa->sa_len = (__uint8_t)len;
3388 *namp = sa;
3389 }
3390 return error;
3391 }
3392
3393 static int
getsockaddr_s(struct socket * so,sockaddr_storage_ref_t ss,user_addr_t uaddr,size_t len,boolean_t translate_unspec)3394 getsockaddr_s(struct socket *so, sockaddr_storage_ref_t ss,
3395 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
3396 {
3397 int error;
3398
3399 if (ss == NULL || uaddr == USER_ADDR_NULL ||
3400 len < offsetof(struct sockaddr, sa_data[0])) {
3401 return EINVAL;
3402 }
3403
3404 /*
3405 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
3406 * so the check here is inclusive.
3407 */
3408 if (len > sizeof(*ss)) {
3409 return ENAMETOOLONG;
3410 }
3411
3412 bzero(ss, sizeof(*ss));
3413 error = copyin(uaddr, __SA_UTILS_CONV_TO_BYTES(ss), len);
3414 if (error == 0) {
3415 /*
3416 * Force sa_family to AF_INET on AF_INET sockets to handle
3417 * legacy applications that use AF_UNSPEC (0). On all other
3418 * sockets we leave it unchanged and let the lower layer
3419 * handle it.
3420 */
3421 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
3422 SOCK_CHECK_DOM(so, PF_INET) &&
3423 len == sizeof(struct sockaddr_in)) {
3424 ss->ss_family = AF_INET;
3425 }
3426
3427 ss->ss_len = (__uint8_t)len;
3428 }
3429 return error;
3430 }
3431
3432 int
internalize_recv_msghdr_array(const void_ptr_t src,int spacetype,int direction,u_int count,user_msghdr_x_ptr_t dst,recv_msg_elem_ptr_t recv_msg_array)3433 internalize_recv_msghdr_array(const void_ptr_t src, int spacetype, int direction,
3434 u_int count, user_msghdr_x_ptr_t dst,
3435 recv_msg_elem_ptr_t recv_msg_array)
3436 {
3437 int error = 0;
3438 u_int i;
3439
3440 for (i = 0; i < count; i++) {
3441 struct user_iovec *iovp;
3442 struct user_msghdr_x *user_msg = dst + i;
3443 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3444
3445 if (spacetype == UIO_USERSPACE64) {
3446 const struct user64_msghdr_x *msghdr64;
3447
3448 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3449
3450 user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
3451 user_msg->msg_namelen = msghdr64->msg_namelen;
3452 user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
3453 user_msg->msg_iovlen = msghdr64->msg_iovlen;
3454 user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
3455 user_msg->msg_controllen = msghdr64->msg_controllen;
3456 user_msg->msg_flags = msghdr64->msg_flags;
3457 user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
3458 } else {
3459 const struct user32_msghdr_x *msghdr32;
3460
3461 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3462
3463 user_msg->msg_name = msghdr32->msg_name;
3464 user_msg->msg_namelen = msghdr32->msg_namelen;
3465 user_msg->msg_iov = msghdr32->msg_iov;
3466 user_msg->msg_iovlen = msghdr32->msg_iovlen;
3467 user_msg->msg_control = msghdr32->msg_control;
3468 user_msg->msg_controllen = msghdr32->msg_controllen;
3469 user_msg->msg_flags = msghdr32->msg_flags;
3470 user_msg->msg_datalen = msghdr32->msg_datalen;
3471 }
3472
3473 if (user_msg->msg_iovlen <= 0 ||
3474 user_msg->msg_iovlen > UIO_MAXIOV) {
3475 error = EMSGSIZE;
3476 goto done;
3477 }
3478 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3479 spacetype, direction);
3480 if (recv_msg_elem->uio == NULL) {
3481 error = ENOMEM;
3482 goto done;
3483 }
3484
3485 iovp = uio_iovsaddr_user(recv_msg_elem->uio);
3486 if (iovp == NULL) {
3487 error = ENOMEM;
3488 goto done;
3489 }
3490 error = copyin_user_iovec_array(user_msg->msg_iov,
3491 spacetype, user_msg->msg_iovlen, iovp);
3492 if (error) {
3493 goto done;
3494 }
3495 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3496
3497 error = uio_calculateresid_user(recv_msg_elem->uio);
3498 if (error) {
3499 goto done;
3500 }
3501 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3502
3503 if (user_msg->msg_name && user_msg->msg_namelen) {
3504 recv_msg_elem->which |= SOCK_MSG_SA;
3505 }
3506 if (user_msg->msg_control && user_msg->msg_controllen) {
3507 recv_msg_elem->which |= SOCK_MSG_CONTROL;
3508 }
3509 }
3510 done:
3511
3512 return error;
3513 }
3514
3515 u_int
externalize_recv_msghdr_array(proc_ref_t p,socket_ref_t so,void_ptr_t dst,u_int count,user_msghdr_x_ptr_t src,recv_msg_elem_ptr_t recv_msg_array,int_ref_t ret_error)3516 externalize_recv_msghdr_array(proc_ref_t p, socket_ref_t so, void_ptr_t dst,
3517 u_int count, user_msghdr_x_ptr_t src,
3518 recv_msg_elem_ptr_t recv_msg_array, int_ref_t ret_error)
3519 {
3520 u_int i;
3521 u_int retcnt = 0;
3522 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3523
3524 *ret_error = 0;
3525
3526 for (i = 0; i < count; i++) {
3527 struct user_msghdr_x *user_msg = src + i;
3528 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3529 user_ssize_t len = 0;
3530 int error;
3531
3532 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3533
3534 if ((recv_msg_elem->which & SOCK_MSG_DATA)) {
3535 retcnt++;
3536
3537 if (recv_msg_elem->which & SOCK_MSG_SA) {
3538 error = copyout_sa(recv_msg_elem->psa, user_msg->msg_name,
3539 &user_msg->msg_namelen);
3540 if (error != 0) {
3541 *ret_error = error;
3542 return 0;
3543 }
3544 }
3545 if (recv_msg_elem->which & SOCK_MSG_CONTROL) {
3546 error = copyout_control(p, recv_msg_elem->controlp,
3547 user_msg->msg_control, &user_msg->msg_controllen,
3548 &recv_msg_elem->flags, so);
3549 if (error != 0) {
3550 *ret_error = error;
3551 return 0;
3552 }
3553 }
3554 }
3555
3556 if (spacetype == UIO_USERSPACE64) {
3557 struct user64_msghdr_x *msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3558
3559 msghdr64->msg_namelen = user_msg->msg_namelen;
3560 msghdr64->msg_controllen = user_msg->msg_controllen;
3561 msghdr64->msg_flags = recv_msg_elem->flags;
3562 msghdr64->msg_datalen = len;
3563 } else {
3564 struct user32_msghdr_x *msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3565
3566 msghdr32->msg_namelen = user_msg->msg_namelen;
3567 msghdr32->msg_controllen = user_msg->msg_controllen;
3568 msghdr32->msg_flags = recv_msg_elem->flags;
3569 msghdr32->msg_datalen = (user32_size_t)len;
3570 }
3571 }
3572 return retcnt;
3573 }
3574
3575 recv_msg_elem_ptr_t
alloc_recv_msg_array(u_int count)3576 alloc_recv_msg_array(u_int count)
3577 {
3578 return kalloc_type(struct recv_msg_elem, count, Z_WAITOK | Z_ZERO);
3579 }
3580
3581 void
free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array,u_int count)3582 free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3583 {
3584 if (recv_msg_array == NULL) {
3585 return;
3586 }
3587 for (uint32_t i = 0; i < count; i++) {
3588 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3589
3590 if (recv_msg_elem->uio != NULL) {
3591 uio_free(recv_msg_elem->uio);
3592 }
3593 free_sockaddr(recv_msg_elem->psa);
3594 if (recv_msg_elem->controlp != NULL) {
3595 m_freem(recv_msg_elem->controlp);
3596 }
3597 }
3598 kfree_type(struct recv_msg_elem, count, recv_msg_array);
3599 }
3600
3601
3602 /* Extern linkage requires using __counted_by instead of bptr */
3603 __private_extern__ user_ssize_t
recv_msg_array_resid(struct recv_msg_elem * __counted_by (count)recv_msg_array,u_int count)3604 recv_msg_array_resid(struct recv_msg_elem * __counted_by(count)recv_msg_array, u_int count)
3605 {
3606 user_ssize_t len = 0;
3607 u_int i;
3608
3609 for (i = 0; i < count; i++) {
3610 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3611
3612 if (recv_msg_elem->uio != NULL) {
3613 len += uio_resid(recv_msg_elem->uio);
3614 }
3615 }
3616 return len;
3617 }
3618
3619 int
recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array,u_int count)3620 recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3621 {
3622 user_ssize_t len = 0;
3623 u_int i;
3624
3625 for (i = 0; i < count; i++) {
3626 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3627
3628 if (recv_msg_elem->uio != NULL) {
3629 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3630
3631 /*
3632 * Sanity check on the validity of the iovec:
3633 * no point of going over sb_max
3634 */
3635 if (resid < 0 || (u_int32_t)resid > sb_max) {
3636 return 0;
3637 }
3638
3639 len += resid;
3640 if (len < 0 || (u_int32_t)len > sb_max) {
3641 return 0;
3642 }
3643 }
3644 }
3645 return 1;
3646 }
3647
3648 #if SENDFILE
3649
3650 #define SFUIOBUFS 64
3651
3652 /* Macros to compute the number of mbufs needed depending on cluster size */
3653 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3654 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3655
3656 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3657 #define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3658
3659 /* Upper send limit in the number of mbuf clusters */
3660 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3661 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3662
3663 static void
alloc_sendpkt(int how,size_t pktlen,unsigned int * maxchunks,mbuf_ref_ref_t m,boolean_t jumbocl)3664 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3665 mbuf_ref_ref_t m, boolean_t jumbocl)
3666 {
3667 unsigned int needed;
3668
3669 if (pktlen == 0) {
3670 panic("%s: pktlen (%ld) must be non-zero", __func__, pktlen);
3671 }
3672
3673 /*
3674 * Try to allocate for the whole thing. Since we want full control
3675 * over the buffer size and be able to accept partial result, we can't
3676 * use mbuf_allocpacket(). The logic below is similar to sosend().
3677 */
3678 *m = NULL;
3679 if (pktlen > MBIGCLBYTES && jumbocl) {
3680 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3681 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3682 }
3683 if (*m == NULL) {
3684 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3685 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3686 }
3687
3688 /*
3689 * Our previous attempt(s) at allocation had failed; the system
3690 * may be short on mbufs, and we want to block until they are
3691 * available. This time, ask just for 1 mbuf and don't return
3692 * until we get it.
3693 */
3694 if (*m == NULL) {
3695 needed = 1;
3696 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3697 }
3698 if (*m == NULL) {
3699 panic("%s: blocking allocation returned NULL", __func__);
3700 }
3701
3702 *maxchunks = needed;
3703 }
3704
3705 /*
3706 * sendfile(2).
3707 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3708 * struct sf_hdtr *hdtr, int flags)
3709 *
3710 * Send a file specified by 'fd' and starting at 'offset' to a socket
3711 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3712 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3713 * output. If specified, write the total number of bytes sent into *nbytes.
3714 */
3715 int
sendfile(proc_ref_t p,struct sendfile_args * uap,__unused int * retval)3716 sendfile(proc_ref_t p, struct sendfile_args *uap, __unused int *retval)
3717 {
3718 fileproc_ref_t fp;
3719 vnode_ref_t vp;
3720 socket_ref_t so;
3721 struct writev_nocancel_args nuap;
3722 user_ssize_t writev_retval;
3723 struct user_sf_hdtr user_hdtr;
3724 struct user32_sf_hdtr user32_hdtr;
3725 struct user64_sf_hdtr user64_hdtr;
3726 off_t off, xfsize;
3727 off_t nbytes = 0, sbytes = 0;
3728 int error = 0;
3729 size_t sizeof_hdtr;
3730 off_t file_size;
3731 struct vfs_context context = *vfs_context_current();
3732 bool got_vnode_ref = false;
3733
3734 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
3735
3736 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3737 0, 0, 0, 0);
3738
3739 AUDIT_ARG(fd, uap->fd);
3740 AUDIT_ARG(value32, uap->s);
3741
3742 /*
3743 * Do argument checking. Must be a regular file in, stream
3744 * type and connected socket out, positive offset.
3745 */
3746 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3747 goto done;
3748 }
3749 if ((error = vnode_getwithref(vp))) {
3750 goto done;
3751 }
3752 got_vnode_ref = true;
3753
3754 if ((fp->f_flag & FREAD) == 0) {
3755 error = EBADF;
3756 goto done1;
3757 }
3758 if (vnode_isreg(vp) == 0) {
3759 error = ENOTSUP;
3760 goto done1;
3761 }
3762 error = file_socket(uap->s, &so);
3763 if (error) {
3764 goto done1;
3765 }
3766 if (so == NULL) {
3767 error = EBADF;
3768 goto done2;
3769 }
3770 if (so->so_type != SOCK_STREAM) {
3771 error = EINVAL;
3772 goto done2;
3773 }
3774 if ((so->so_state & SS_ISCONNECTED) == 0) {
3775 error = ENOTCONN;
3776 goto done2;
3777 }
3778 if (uap->offset < 0) {
3779 error = EINVAL;
3780 goto done2;
3781 }
3782 if (uap->nbytes == USER_ADDR_NULL) {
3783 error = EINVAL;
3784 goto done2;
3785 }
3786 if (uap->flags != 0) {
3787 error = EINVAL;
3788 goto done2;
3789 }
3790
3791 context.vc_ucred = fp->fp_glob->fg_cred;
3792
3793 #if CONFIG_MACF_SOCKET_SUBSET
3794 /* JMM - fetch connected sockaddr? */
3795 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3796 if (error) {
3797 goto done2;
3798 }
3799 #endif
3800
3801 /*
3802 * Get number of bytes to send
3803 * Should it applies to size of header and trailer?
3804 */
3805 error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3806 if (error) {
3807 goto done2;
3808 }
3809
3810 /*
3811 * If specified, get the pointer to the sf_hdtr struct for
3812 * any headers/trailers.
3813 */
3814 if (uap->hdtr != USER_ADDR_NULL) {
3815 caddr_t hdtrp;
3816
3817 bzero(&user_hdtr, sizeof(user_hdtr));
3818 if (is_p_64bit_process) {
3819 hdtrp = (caddr_t)&user64_hdtr;
3820 sizeof_hdtr = sizeof(user64_hdtr);
3821 } else {
3822 hdtrp = (caddr_t)&user32_hdtr;
3823 sizeof_hdtr = sizeof(user32_hdtr);
3824 }
3825 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3826 if (error) {
3827 goto done2;
3828 }
3829 if (is_p_64bit_process) {
3830 user_hdtr.headers = user64_hdtr.headers;
3831 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3832 user_hdtr.trailers = user64_hdtr.trailers;
3833 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3834 } else {
3835 user_hdtr.headers = user32_hdtr.headers;
3836 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3837 user_hdtr.trailers = user32_hdtr.trailers;
3838 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3839 }
3840
3841 /*
3842 * Send any headers. Wimp out and use writev(2).
3843 */
3844 if (user_hdtr.headers != USER_ADDR_NULL) {
3845 bzero(&nuap, sizeof(struct writev_args));
3846 nuap.fd = uap->s;
3847 nuap.iovp = user_hdtr.headers;
3848 nuap.iovcnt = user_hdtr.hdr_cnt;
3849 error = writev_nocancel(p, &nuap, &writev_retval);
3850 if (error) {
3851 goto done2;
3852 }
3853 sbytes += writev_retval;
3854 }
3855 }
3856
3857 /*
3858 * Get the file size for 2 reasons:
3859 * 1. We don't want to allocate more mbufs than necessary
3860 * 2. We don't want to read past the end of file
3861 */
3862 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3863 goto done2;
3864 }
3865
3866 /*
3867 * Simply read file data into a chain of mbufs that used with scatter
3868 * gather reads. We're not (yet?) setup to use zero copy external
3869 * mbufs that point to the file pages.
3870 */
3871 socket_lock(so, 1);
3872 error = sblock(&so->so_snd, SBL_WAIT);
3873 if (error) {
3874 socket_unlock(so, 1);
3875 goto done2;
3876 }
3877 for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3878 mbuf_ref_t m0 = NULL;
3879 mbuf_t m;
3880 unsigned int nbufs = SFUIOBUFS, i;
3881 uio_t auio;
3882 UIO_STACKBUF(uio_buf, SFUIOBUFS); /* 1KB !!! */
3883 size_t uiolen;
3884 user_ssize_t rlen;
3885 off_t pgoff;
3886 size_t pktlen;
3887 boolean_t jumbocl;
3888
3889 /*
3890 * Calculate the amount to transfer.
3891 * Align to round number of pages.
3892 * Not to exceed send socket buffer,
3893 * the EOF, or the passed in nbytes.
3894 */
3895 xfsize = sbspace(&so->so_snd);
3896
3897 if (xfsize <= 0) {
3898 if (so->so_state & SS_CANTSENDMORE) {
3899 error = EPIPE;
3900 goto done3;
3901 } else if ((so->so_state & SS_NBIO)) {
3902 error = EAGAIN;
3903 goto done3;
3904 } else {
3905 xfsize = PAGE_SIZE;
3906 }
3907 }
3908
3909 if (xfsize > SENDFILE_MAX_BYTES) {
3910 xfsize = SENDFILE_MAX_BYTES;
3911 } else if (xfsize > PAGE_SIZE) {
3912 xfsize = trunc_page(xfsize);
3913 }
3914 pgoff = off & PAGE_MASK_64;
3915 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3916 xfsize = PAGE_SIZE_64 - pgoff;
3917 }
3918 if (nbytes && xfsize > (nbytes - sbytes)) {
3919 xfsize = nbytes - sbytes;
3920 }
3921 if (xfsize <= 0) {
3922 break;
3923 }
3924 if (off + xfsize > file_size) {
3925 xfsize = file_size - off;
3926 }
3927 if (xfsize <= 0) {
3928 break;
3929 }
3930
3931 /*
3932 * Attempt to use larger than system page-size clusters for
3933 * large writes only if there is a jumbo cluster pool and
3934 * if the socket is marked accordingly.
3935 */
3936 jumbocl = sosendjcl && njcl > 0 &&
3937 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3938
3939 socket_unlock(so, 0);
3940 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3941 pktlen = mbuf_pkthdr_maxlen(m0);
3942 if (pktlen < (size_t)xfsize) {
3943 xfsize = pktlen;
3944 }
3945
3946 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3947 UIO_READ, &uio_buf[0], sizeof(uio_buf));
3948 if (auio == NULL) {
3949 DBG_PRINTF("sendfile failed. nbufs = %d. %s", nbufs,
3950 "File a radar related to rdar://10146739.\n");
3951 mbuf_freem(m0);
3952 error = ENXIO;
3953 socket_lock(so, 0);
3954 goto done3;
3955 }
3956
3957 for (i = 0, m = m0, uiolen = 0;
3958 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3959 i++, m = mbuf_next(m)) {
3960 size_t mlen = mbuf_maxlen(m);
3961
3962 if (mlen + uiolen > (size_t)xfsize) {
3963 mlen = xfsize - uiolen;
3964 }
3965 mbuf_setlen(m, mlen);
3966 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3967 mlen);
3968 uiolen += mlen;
3969 }
3970
3971 if (xfsize != uio_resid(auio)) {
3972 DBG_PRINTF("sendfile: xfsize: %lld != uio_resid(auio): "
3973 "%lld\n", xfsize, (long long)uio_resid(auio));
3974 }
3975
3976 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3977 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3978 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3979 error = fo_read(fp, auio, FOF_OFFSET, &context);
3980 socket_lock(so, 0);
3981 if (error != 0) {
3982 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3983 error == EINTR || error == EWOULDBLOCK)) {
3984 error = 0;
3985 } else {
3986 mbuf_freem(m0);
3987 goto done3;
3988 }
3989 }
3990 xfsize -= uio_resid(auio);
3991 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3992 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3993 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3994
3995 if (xfsize == 0) {
3996 break;
3997 }
3998 if (xfsize + off > file_size) {
3999 DBG_PRINTF("sendfile: xfsize: %lld + off: %lld > file_size:"
4000 "%lld\n", xfsize, off, file_size);
4001 }
4002 for (i = 0, m = m0, rlen = 0;
4003 i < nbufs && m != NULL && rlen < xfsize;
4004 i++, m = mbuf_next(m)) {
4005 size_t mlen = mbuf_maxlen(m);
4006
4007 if (rlen + mlen > (size_t)xfsize) {
4008 mlen = xfsize - rlen;
4009 }
4010 mbuf_setlen(m, mlen);
4011
4012 rlen += mlen;
4013 }
4014 mbuf_pkthdr_setlen(m0, xfsize);
4015
4016 retry_space:
4017 /*
4018 * Make sure that the socket is still able to take more data.
4019 * CANTSENDMORE being true usually means that the connection
4020 * was closed. so_error is true when an error was sensed after
4021 * a previous send.
4022 * The state is checked after the page mapping and buffer
4023 * allocation above since those operations may block and make
4024 * any socket checks stale. From this point forward, nothing
4025 * blocks before the pru_send (or more accurately, any blocking
4026 * results in a loop back to here to re-check).
4027 */
4028 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
4029 if (so->so_state & SS_CANTSENDMORE) {
4030 error = EPIPE;
4031 } else {
4032 error = so->so_error;
4033 so->so_error = 0;
4034 }
4035 m_freem(m0);
4036 goto done3;
4037 }
4038 /*
4039 * Wait for socket space to become available. We do this just
4040 * after checking the connection state above in order to avoid
4041 * a race condition with sbwait().
4042 */
4043 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
4044 if (so->so_state & SS_NBIO) {
4045 m_freem(m0);
4046 error = EAGAIN;
4047 goto done3;
4048 }
4049 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
4050 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
4051 error = sbwait(&so->so_snd);
4052 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
4053 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
4054 /*
4055 * An error from sbwait usually indicates that we've
4056 * been interrupted by a signal. If we've sent anything
4057 * then return bytes sent, otherwise return the error.
4058 */
4059 if (error) {
4060 m_freem(m0);
4061 goto done3;
4062 }
4063 goto retry_space;
4064 }
4065
4066 mbuf_ref_t control = NULL;
4067 {
4068 /*
4069 * Socket filter processing
4070 */
4071
4072 error = sflt_data_out(so, NULL, &m0, &control, 0);
4073 if (error) {
4074 if (error == EJUSTRETURN) {
4075 error = 0;
4076 continue;
4077 }
4078 goto done3;
4079 }
4080 /*
4081 * End Socket filter processing
4082 */
4083 }
4084 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
4085 uap->s, 0, 0, 0, 0);
4086 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
4087 NULL, control, p);
4088 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
4089 uap->s, 0, 0, 0, 0);
4090 if (error) {
4091 goto done3;
4092 }
4093 }
4094 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
4095 /*
4096 * Send trailers. Wimp out and use writev(2).
4097 */
4098 if (uap->hdtr != USER_ADDR_NULL &&
4099 user_hdtr.trailers != USER_ADDR_NULL) {
4100 bzero(&nuap, sizeof(struct writev_args));
4101 nuap.fd = uap->s;
4102 nuap.iovp = user_hdtr.trailers;
4103 nuap.iovcnt = user_hdtr.trl_cnt;
4104 error = writev_nocancel(p, &nuap, &writev_retval);
4105 if (error) {
4106 goto done2;
4107 }
4108 sbytes += writev_retval;
4109 }
4110 done2:
4111 file_drop(uap->s);
4112 done1:
4113 file_drop(uap->fd);
4114 done:
4115 if (got_vnode_ref) {
4116 vnode_put(vp);
4117 }
4118 if (uap->nbytes != USER_ADDR_NULL) {
4119 /* XXX this appears bogus for some early failure conditions */
4120 copyout(&sbytes, uap->nbytes, sizeof(off_t));
4121 }
4122 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
4123 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
4124 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
4125 return error;
4126 done3:
4127 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
4128 goto done2;
4129 }
4130
4131
4132 #endif /* SENDFILE */
4133