1 /*
2 * Copyright (c) 2000-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65 /*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72 #include <sys/cdefs.h>
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/filedesc.h>
76 #include <sys/proc_internal.h>
77 #include <sys/file_internal.h>
78 #include <sys/vnode_internal.h>
79 #include <sys/malloc.h>
80 #include <sys/mcache.h>
81 #include <sys/mbuf.h>
82 #include <kern/locks.h>
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/signalvar.h>
86 #include <sys/socket.h>
87 #include <sys/socketvar.h>
88 #include <sys/kernel.h>
89 #include <sys/uio_internal.h>
90 #include <sys/kauth.h>
91 #include <kern/task.h>
92 #include <sys/priv.h>
93 #include <sys/sysctl.h>
94 #include <sys/sys_domain.h>
95 #include <sys/types.h>
96
97 #include <security/audit/audit.h>
98
99 #include <sys/kdebug.h>
100 #include <sys/sysproto.h>
101 #include <netinet/in.h>
102 #include <net/route.h>
103 #include <netinet/in_pcb.h>
104
105 #include <os/log.h>
106 #include <os/ptrtools.h>
107
108 #include <os/log.h>
109
110 #if CONFIG_MACF_SOCKET_SUBSET
111 #include <security/mac_framework.h>
112 #endif /* MAC_SOCKET_SUBSET */
113
114 #include <net/sockaddr_utils.h>
115
116 extern char *proc_name_address(void *p);
117
118 #define f_flag fp_glob->fg_flag
119 #define f_ops fp_glob->fg_ops
120
121 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
122 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
123 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
124 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
125 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
126 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
127 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
128 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
129 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
130 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
131 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
132 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
133 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
134 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
135 #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
136 #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
137
138 /* Forward declarations for referenced types */
139 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(void, void, __CCT_PTR);
140 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(uint8_t, uint8_t, __CCT_PTR);
141 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int32_t, int32, __CCT_REF);
142 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(int, int, __CCT_REF);
143 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(user_ssize_t, user_ssize, __CCT_REF);
144 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(unsigned int, uint, __CCT_REF);
145 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(sae_connid_t, sae_connid, __CCT_REF);
146 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(socklen_t, socklen, __CCT_REF);
147 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct setsockopt_args, setsockopt_args, __CCT_REF);
148 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct connectx_args, connectx_args, __CCT_REF);
149 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct disconnectx_args, disconnectx_args, __CCT_REF);
150 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct cmsghdr, cmsghdr, __CCT_REF);
151 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct timeval, timeval, __CCT_REF);
152 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user64_timeval, user64_timeval, __CCT_REF);
153 __CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user32_timeval, user32_timeval, __CCT_REF);
154
155 static int sendit(proc_ref_t, socket_ref_t, user_msghdr_ref_t, uio_t,
156 int, int32_ref_t );
157 static int recvit(proc_ref_t, int, user_msghdr_ref_t, uio_t, user_addr_t,
158 int32_ref_t);
159 static int connectit(socket_ref_t, sockaddr_ref_t);
160 static int getsockaddr(socket_ref_t, sockaddr_ref_ref_t, user_addr_t,
161 size_t, boolean_t);
162 static int getsockaddr_s(socket_ref_t, sockaddr_storage_ref_t,
163 user_addr_t, size_t, boolean_t);
164 #if SENDFILE
165 static void alloc_sendpkt(int, size_t, uint_ref_t, mbuf_ref_ref_t,
166 boolean_t);
167 #endif /* SENDFILE */
168 static int connectx_nocancel(proc_ref_t, connectx_args_ref_t, int_ref_t);
169 static int connectitx(socket_ref_t, sockaddr_ref_t,
170 sockaddr_ref_t, proc_ref_t, uint32_t, sae_associd_t,
171 sae_connid_ref_t, uio_t, unsigned int, user_ssize_ref_t);
172 static int disconnectx_nocancel(proc_ref_t, disconnectx_args_ref_t,
173 int_ref_t);
174 static int socket_common(proc_ref_t, int, int, int, pid_t, int32_ref_t, int);
175
176 static int internalize_recv_msghdr_array(const void_ptr_t, int, int,
177 u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t);
178 static u_int externalize_recv_msghdr_array(proc_ref_t, socket_ref_t, void_ptr_t,
179 u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t, int_ref_t);
180
181 static recv_msg_elem_ptr_t alloc_recv_msg_array(u_int count);
182 static int recv_msg_array_is_valid(recv_msg_elem_ptr_t, u_int count);
183 static void free_recv_msg_array(recv_msg_elem_ptr_t, u_int count);
184 static int copyout_control(proc_ref_t, mbuf_ref_t, user_addr_t control,
185 socklen_ref_t, int_ref_t, socket_ref_t);
186
187 SYSCTL_DECL(_kern_ipc);
188
189 #define SO_MAX_MSG_X_DEFAULT 256
190
191 static u_int somaxsendmsgx = SO_MAX_MSG_X_DEFAULT;
192 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
193 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
194
195 static u_int somaxrecvmsgx = SO_MAX_MSG_X_DEFAULT;
196 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
197 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
198
199 static u_int missingpktinfo = 0;
200 SYSCTL_UINT(_kern_ipc, OID_AUTO, missingpktinfo,
201 CTLFLAG_RD | CTLFLAG_LOCKED, &missingpktinfo, 0, "");
202
203 static int do_recvmsg_x_donttrunc = 0;
204 SYSCTL_INT(_kern_ipc, OID_AUTO, do_recvmsg_x_donttrunc,
205 CTLFLAG_RW | CTLFLAG_LOCKED, &do_recvmsg_x_donttrunc, 0, "");
206
207 #if DEBUG || DEVELOPMENT
208 static int uipc_debug = 0;
209 SYSCTL_INT(_kern_ipc, OID_AUTO, debug,
210 CTLFLAG_RW | CTLFLAG_LOCKED, &uipc_debug, 0, "");
211
212 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
213 #define DBG_PRINTF(...) if (uipc_debug != 0) { \
214 os_log(OS_LOG_DEFAULT, __VA_ARGS__); \
215 }
216 #else
217 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
218 #define DBG_PRINTF(...) do { } while (0)
219 #endif
220
221
222 /*
223 * Values for sendmsg_x_mode
224 * 0: default
225 * 1: sendit loop one at a time
226 * 2: old implementation
227 */
228 static u_int sendmsg_x_mode = 0;
229 SYSCTL_UINT(_kern_ipc, OID_AUTO, sendmsg_x_mode,
230 CTLFLAG_RW | CTLFLAG_LOCKED, &sendmsg_x_mode, 0, "");
231
232 /*
233 * System call interface to the socket abstraction.
234 */
235
236 extern const struct fileops socketops;
237
238 /*
239 * Returns: 0 Success
240 * EACCES Mandatory Access Control failure
241 * falloc:ENFILE
242 * falloc:EMFILE
243 * falloc:ENOMEM
244 * socreate:EAFNOSUPPORT
245 * socreate:EPROTOTYPE
246 * socreate:EPROTONOSUPPORT
247 * socreate:ENOBUFS
248 * socreate:ENOMEM
249 * socreate:??? [other protocol families, IPSEC]
250 */
251 int
socket(proc_ref_t p,struct socket_args * uap,int32_ref_t retval)252 socket(proc_ref_t p,
253 struct socket_args *uap,
254 int32_ref_t retval)
255 {
256 return socket_common(p, uap->domain, uap->type, uap->protocol,
257 proc_selfpid(), retval, 0);
258 }
259
260 int
socket_delegate(proc_ref_t p,struct socket_delegate_args * uap,int32_ref_t retval)261 socket_delegate(proc_ref_t p,
262 struct socket_delegate_args *uap,
263 int32_ref_t retval)
264 {
265 return socket_common(p, uap->domain, uap->type, uap->protocol,
266 uap->epid, retval, 1);
267 }
268
269 static int
socket_common(proc_ref_t p,int domain,int type,int protocol,pid_t epid,int32_ref_t retval,int delegate)270 socket_common(proc_ref_t p,
271 int domain,
272 int type,
273 int protocol,
274 pid_t epid,
275 int32_ref_t retval,
276 int delegate)
277 {
278 socket_ref_t so;
279 fileproc_ref_t fp;
280 int fd, error;
281
282 AUDIT_ARG(socket, domain, type, protocol);
283 #if CONFIG_MACF_SOCKET_SUBSET
284 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
285 type, protocol)) != 0) {
286 return error;
287 }
288 #endif /* MAC_SOCKET_SUBSET */
289
290 if (delegate) {
291 error = priv_check_cred(kauth_cred_get(),
292 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
293 if (error) {
294 return EACCES;
295 }
296 }
297
298 error = falloc(p, &fp, &fd);
299 if (error) {
300 return error;
301 }
302 fp->f_flag = FREAD | FWRITE;
303 fp->f_ops = &socketops;
304
305 if (delegate) {
306 error = socreate_delegate(domain, &so, type, protocol, epid);
307 } else {
308 error = socreate(domain, &so, type, protocol);
309 }
310
311 if (error) {
312 fp_free(p, fd, fp);
313 } else {
314 fp_set_data(fp, so);
315
316 proc_fdlock(p);
317 procfdtbl_releasefd(p, fd, NULL);
318
319 if (ENTR_SHOULDTRACE) {
320 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
321 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
322 }
323 fp_drop(p, fd, fp, 1);
324 proc_fdunlock(p);
325
326 *retval = fd;
327 }
328 return error;
329 }
330
331 /*
332 * Returns: 0 Success
333 * EDESTADDRREQ Destination address required
334 * EBADF Bad file descriptor
335 * EACCES Mandatory Access Control failure
336 * file_socket:ENOTSOCK
337 * file_socket:EBADF
338 * getsockaddr:ENAMETOOLONG Filename too long
339 * getsockaddr:EINVAL Invalid argument
340 * getsockaddr:ENOMEM Not enough space
341 * getsockaddr:EFAULT Bad address
342 * sobindlock:???
343 */
344 /* ARGSUSED */
345 int
bind(__unused proc_t p,struct bind_args * uap,__unused int32_ref_t retval)346 bind(__unused proc_t p, struct bind_args *uap, __unused int32_ref_t retval)
347 {
348 struct sockaddr_storage ss;
349 sockaddr_ref_t sa = NULL;
350 socket_ref_t so;
351 boolean_t want_free = TRUE;
352 int error;
353
354 AUDIT_ARG(fd, uap->s);
355 error = file_socket(uap->s, &so);
356 if (error != 0) {
357 return error;
358 }
359 if (so == NULL) {
360 error = EBADF;
361 goto out;
362 }
363 if (uap->name == USER_ADDR_NULL) {
364 error = EDESTADDRREQ;
365 goto out;
366 }
367 if (uap->namelen > sizeof(ss)) {
368 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
369 } else {
370 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
371 if (error == 0) {
372 sa = SA(&ss);
373 want_free = FALSE;
374 }
375 }
376 if (error != 0) {
377 goto out;
378 }
379 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
380 #if CONFIG_MACF_SOCKET_SUBSET
381 if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
382 (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
383 error = sobindlock(so, sa, 1); /* will lock socket */
384 }
385 #else
386 error = sobindlock(so, sa, 1); /* will lock socket */
387 #endif /* MAC_SOCKET_SUBSET */
388 if (want_free) {
389 free_sockaddr(sa);
390 }
391 out:
392 file_drop(uap->s);
393 return error;
394 }
395
396 /*
397 * Returns: 0 Success
398 * EBADF
399 * EACCES Mandatory Access Control failure
400 * file_socket:ENOTSOCK
401 * file_socket:EBADF
402 * solisten:EINVAL
403 * solisten:EOPNOTSUPP
404 * solisten:???
405 */
406 int
listen(__unused proc_ref_t p,struct listen_args * uap,__unused int32_ref_t retval)407 listen(__unused proc_ref_t p, struct listen_args *uap,
408 __unused int32_ref_t retval)
409 {
410 int error;
411 socket_ref_t so;
412
413 AUDIT_ARG(fd, uap->s);
414 error = file_socket(uap->s, &so);
415 if (error) {
416 return error;
417 }
418 if (so != NULL)
419 #if CONFIG_MACF_SOCKET_SUBSET
420 {
421 error = mac_socket_check_listen(kauth_cred_get(), so);
422 if (error == 0) {
423 error = solisten(so, uap->backlog);
424 }
425 }
426 #else
427 { error = solisten(so, uap->backlog);}
428 #endif /* MAC_SOCKET_SUBSET */
429 else {
430 error = EBADF;
431 }
432
433 file_drop(uap->s);
434 return error;
435 }
436
437 /*
438 * Returns: fp_get_ftype:EBADF Bad file descriptor
439 * fp_get_ftype:ENOTSOCK Socket operation on non-socket
440 * :EFAULT Bad address on copyin/copyout
441 * :EBADF Bad file descriptor
442 * :EOPNOTSUPP Operation not supported on socket
443 * :EINVAL Invalid argument
444 * :EWOULDBLOCK Operation would block
445 * :ECONNABORTED Connection aborted
446 * :EINTR Interrupted function
447 * :EACCES Mandatory Access Control failure
448 * falloc:ENFILE Too many files open in system
449 * falloc:EMFILE Too many open files
450 * falloc:ENOMEM Not enough space
451 * 0 Success
452 */
453 int
accept_nocancel(proc_ref_t p,struct accept_nocancel_args * uap,int32_ref_t retval)454 accept_nocancel(proc_ref_t p, struct accept_nocancel_args *uap,
455 int32_ref_t retval)
456 {
457 fileproc_ref_t fp;
458 sockaddr_ref_t sa = NULL;
459 socklen_t namelen;
460 int error;
461 socket_ref_t head;
462 socket_ref_t so = NULL;
463 lck_mtx_t *mutex_held;
464 int fd = uap->s;
465 int newfd;
466 unsigned int fflag;
467 int dosocklock = 0;
468
469 *retval = -1;
470
471 AUDIT_ARG(fd, uap->s);
472
473 if (uap->name) {
474 error = copyin(uap->anamelen, (caddr_t)&namelen,
475 sizeof(socklen_t));
476 if (error) {
477 return error;
478 }
479 }
480 error = fp_get_ftype(p, fd, DTYPE_SOCKET, ENOTSOCK, &fp);
481 if (error) {
482 return error;
483 }
484 head = (struct socket *)fp_get_data(fp);
485
486 #if CONFIG_MACF_SOCKET_SUBSET
487 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
488 goto out;
489 }
490 #endif /* MAC_SOCKET_SUBSET */
491
492 socket_lock(head, 1);
493
494 if (head->so_proto->pr_getlock != NULL) {
495 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
496 dosocklock = 1;
497 } else {
498 mutex_held = head->so_proto->pr_domain->dom_mtx;
499 dosocklock = 0;
500 }
501
502 if ((head->so_options & SO_ACCEPTCONN) == 0) {
503 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
504 error = EOPNOTSUPP;
505 } else {
506 /* POSIX: The socket is not accepting connections */
507 error = EINVAL;
508 }
509 socket_unlock(head, 1);
510 os_log(OS_LOG_DEFAULT, "%s:%d accept() SO_ACCEPTCONN %d: msleep", proc_name_address(p), proc_selfpid(), error);
511 goto out;
512 }
513 check_again:
514 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
515 socket_unlock(head, 1);
516 error = EWOULDBLOCK;
517 os_log(OS_LOG_DEFAULT, "%s:%d accept() error %d: non-blocking empty queue", proc_name_address(p), proc_selfpid(), error);
518 goto out;
519 }
520 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
521 if (head->so_state & SS_CANTRCVMORE) {
522 head->so_error = ECONNABORTED;
523 break;
524 }
525 if (head->so_usecount < 1) {
526 panic("accept: head=%p refcount=%d", head,
527 head->so_usecount);
528 }
529 error = msleep((caddr_t)&head->so_timeo, mutex_held,
530 PSOCK | PCATCH, "accept", 0);
531 if (head->so_usecount < 1) {
532 panic("accept: 2 head=%p refcount=%d", head,
533 head->so_usecount);
534 }
535 if ((head->so_state & SS_DRAINING)) {
536 error = ECONNABORTED;
537 }
538 if (error) {
539 os_log(OS_LOG_DEFAULT, "%s:%d accept() error %d: msleep", proc_name_address(p), proc_selfpid(), error);
540 socket_unlock(head, 1);
541 goto out;
542 }
543 }
544 if (head->so_error) {
545 error = head->so_error;
546 head->so_error = 0;
547 socket_unlock(head, 1);
548 os_log(OS_LOG_DEFAULT, "%s:%d accept() error %d: head->so_error", proc_name_address(p), proc_selfpid(), error);
549 goto out;
550 }
551
552 /*
553 * At this point we know that there is at least one connection
554 * ready to be accepted. Remove it from the queue prior to
555 * allocating the file descriptor for it since falloc() may
556 * block allowing another process to accept the connection
557 * instead.
558 */
559 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
560
561 so_acquire_accept_list(head, NULL);
562 if (TAILQ_EMPTY(&head->so_comp)) {
563 so_release_accept_list(head);
564 goto check_again;
565 }
566
567 so = TAILQ_FIRST(&head->so_comp);
568 TAILQ_REMOVE(&head->so_comp, so, so_list);
569 /*
570 * Acquire the lock of the new connection
571 * as we may be in the process of receiving
572 * a packet that may change its so_state
573 * (e.g.: a TCP FIN).
574 */
575 if (dosocklock) {
576 socket_lock(so, 0);
577 }
578 so->so_head = NULL;
579 so->so_state &= ~SS_COMP;
580 if (dosocklock) {
581 socket_unlock(so, 0);
582 }
583 head->so_qlen--;
584 so_release_accept_list(head);
585
586 /* unlock head to avoid deadlock with select, keep a ref on head */
587 socket_unlock(head, 0);
588
589 #if CONFIG_MACF_SOCKET_SUBSET
590 /*
591 * Pass the pre-accepted socket to the MAC framework. This is
592 * cheaper than allocating a file descriptor for the socket,
593 * calling the protocol accept callback, and possibly freeing
594 * the file descriptor should the MAC check fails.
595 */
596 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
597 socket_lock(so, 1);
598 so->so_state &= ~SS_NOFDREF;
599 socket_unlock(so, 1);
600 soclose(so);
601 /* Drop reference on listening socket */
602 sodereference(head);
603 goto out;
604 }
605 #endif /* MAC_SOCKET_SUBSET */
606
607 /*
608 * Pass the pre-accepted socket to any interested socket filter(s).
609 * Upon failure, the socket would have been closed by the callee.
610 */
611 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
612 /* Drop reference on listening socket */
613 sodereference(head);
614 /* Propagate socket filter's error code to the caller */
615 os_log(OS_LOG_DEFAULT, "%s:%d accept() error %d: soacceptfilter", proc_name_address(p), proc_selfpid(), error);
616 goto out;
617 }
618
619 fflag = fp->f_flag;
620 error = falloc(p, &fp, &newfd);
621 if (error) {
622 /*
623 * Probably ran out of file descriptors.
624 *
625 * <rdar://problem/8554930>
626 * Don't put this back on the socket like we used to, that
627 * just causes the client to spin. Drop the socket.
628 */
629 socket_lock(so, 1);
630 so->so_state &= ~SS_NOFDREF;
631 socket_unlock(so, 1);
632 soclose(so);
633 sodereference(head);
634 os_log(OS_LOG_DEFAULT, "%s:%d accept() error %d: falloc", proc_name_address(p), proc_selfpid(), error);
635 goto out;
636 }
637 *retval = newfd;
638 fp->f_flag = fflag;
639 fp->f_ops = &socketops;
640 fp_set_data(fp, so);
641
642 socket_lock(head, 0);
643 if (dosocklock) {
644 socket_lock(so, 1);
645 }
646
647 /* Sync socket non-blocking/async state with file flags */
648 if (fp->f_flag & FNONBLOCK) {
649 so->so_state |= SS_NBIO;
650 } else {
651 so->so_state &= ~SS_NBIO;
652 }
653
654 if (fp->f_flag & FASYNC) {
655 so->so_state |= SS_ASYNC;
656 so->so_rcv.sb_flags |= SB_ASYNC;
657 so->so_snd.sb_flags |= SB_ASYNC;
658 } else {
659 so->so_state &= ~SS_ASYNC;
660 so->so_rcv.sb_flags &= ~SB_ASYNC;
661 so->so_snd.sb_flags &= ~SB_ASYNC;
662 }
663
664 (void) soacceptlock(so, &sa, 0);
665 socket_unlock(head, 1);
666 if (sa == NULL) {
667 namelen = 0;
668 if (uap->name) {
669 goto gotnoname;
670 }
671 error = 0;
672 goto releasefd;
673 }
674 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
675
676 if (uap->name) {
677 socklen_t sa_len;
678
679 /* save sa_len before it is destroyed */
680 sa_len = sa->sa_len;
681 namelen = MIN(namelen, sa_len);
682 error = copyout(__SA_UTILS_CONV_TO_BYTES(sa), uap->name, namelen);
683 if (!error) {
684 /* return the actual, untruncated address length */
685 namelen = sa_len;
686 }
687 gotnoname:
688 error = copyout((caddr_t)&namelen, uap->anamelen,
689 sizeof(socklen_t));
690 if (__improbable(error != 0)) {
691 os_log(OS_LOG_DEFAULT, "%s:%d accept() error %d: falloc", proc_name_address(p), proc_selfpid(), error);
692 }
693 }
694 free_sockaddr(sa);
695
696 releasefd:
697 /*
698 * If the socket has been marked as inactive by sosetdefunct(),
699 * disallow further operations on it.
700 */
701 if (so->so_flags & SOF_DEFUNCT) {
702 sodefunct(current_proc(), so,
703 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
704 }
705
706 if (dosocklock) {
707 socket_unlock(so, 1);
708 }
709
710 proc_fdlock(p);
711 procfdtbl_releasefd(p, newfd, NULL);
712 fp_drop(p, newfd, fp, 1);
713 proc_fdunlock(p);
714
715 out:
716 if (error == 0 && ENTR_SHOULDTRACE) {
717 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
718 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
719 }
720
721 file_drop(fd);
722 return error;
723 }
724
725 int
accept(proc_ref_t p,struct accept_args * uap,int32_ref_t retval)726 accept(proc_ref_t p, struct accept_args *uap, int32_ref_t retval)
727 {
728 __pthread_testcancel(1);
729 return accept_nocancel(p, (struct accept_nocancel_args *)uap,
730 retval);
731 }
732
733 /*
734 * Returns: 0 Success
735 * EBADF Bad file descriptor
736 * EALREADY Connection already in progress
737 * EINPROGRESS Operation in progress
738 * ECONNABORTED Connection aborted
739 * EINTR Interrupted function
740 * EACCES Mandatory Access Control failure
741 * file_socket:ENOTSOCK
742 * file_socket:EBADF
743 * getsockaddr:ENAMETOOLONG Filename too long
744 * getsockaddr:EINVAL Invalid argument
745 * getsockaddr:ENOMEM Not enough space
746 * getsockaddr:EFAULT Bad address
747 * soconnectlock:EOPNOTSUPP
748 * soconnectlock:EISCONN
749 * soconnectlock:??? [depends on protocol, filters]
750 * msleep:EINTR
751 *
752 * Imputed: so_error error may be set from so_error, which
753 * may have been set by soconnectlock.
754 */
755 /* ARGSUSED */
756 int
connect(proc_ref_t p,struct connect_args * uap,int32_ref_t retval)757 connect(proc_ref_t p, struct connect_args *uap, int32_ref_t retval)
758 {
759 __pthread_testcancel(1);
760 return connect_nocancel(p, (struct connect_nocancel_args *)uap,
761 retval);
762 }
763
764 int
connect_nocancel(proc_t p,struct connect_nocancel_args * uap,int32_ref_t retval)765 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_ref_t retval)
766 {
767 #pragma unused(p, retval)
768 socket_ref_t so;
769 struct sockaddr_storage ss;
770 sockaddr_ref_t sa = NULL;
771 int error;
772 int fd = uap->s;
773 boolean_t dgram;
774
775 AUDIT_ARG(fd, uap->s);
776 error = file_socket(fd, &so);
777 if (error != 0) {
778 return error;
779 }
780 if (so == NULL) {
781 error = EBADF;
782 goto out;
783 }
784
785 /*
786 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
787 * if this is a datagram socket; translate for other types.
788 */
789 dgram = (so->so_type == SOCK_DGRAM);
790
791 /* Get socket address now before we obtain socket lock */
792 if (uap->namelen > sizeof(ss)) {
793 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
794 } else {
795 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
796 if (error == 0) {
797 sa = SA(&ss);
798 }
799 }
800 if (error != 0) {
801 goto out;
802 }
803
804 error = connectit(so, sa);
805
806 if (sa != NULL && sa != SA(&ss)) {
807 free_sockaddr(sa);
808 }
809 if (error == ERESTART) {
810 error = EINTR;
811 }
812 out:
813 file_drop(fd);
814 return error;
815 }
816
817 static int
connectx_nocancel(proc_ref_t p,connectx_args_ref_t uap,int_ref_t retval)818 connectx_nocancel(proc_ref_t p, connectx_args_ref_t uap, int_ref_t retval)
819 {
820 #pragma unused(p, retval)
821 struct sockaddr_storage ss, sd;
822 sockaddr_ref_t src = NULL, dst = NULL;
823 socket_ref_t so;
824 int error, error1, fd = uap->socket;
825 boolean_t dgram;
826 sae_connid_t cid = SAE_CONNID_ANY;
827 struct user32_sa_endpoints ep32;
828 struct user64_sa_endpoints ep64;
829 struct user_sa_endpoints ep;
830 user_ssize_t bytes_written = 0;
831 struct user_iovec *iovp;
832 uio_t auio = NULL;
833
834 AUDIT_ARG(fd, uap->socket);
835 error = file_socket(fd, &so);
836 if (error != 0) {
837 return error;
838 }
839 if (so == NULL) {
840 error = EBADF;
841 goto out;
842 }
843
844 if (uap->endpoints == USER_ADDR_NULL) {
845 error = EINVAL;
846 goto out;
847 }
848
849 if (IS_64BIT_PROCESS(p)) {
850 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
851 if (error != 0) {
852 goto out;
853 }
854
855 ep.sae_srcif = ep64.sae_srcif;
856 ep.sae_srcaddr = (user_addr_t)ep64.sae_srcaddr;
857 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
858 ep.sae_dstaddr = (user_addr_t)ep64.sae_dstaddr;
859 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
860 } else {
861 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
862 if (error != 0) {
863 goto out;
864 }
865
866 ep.sae_srcif = ep32.sae_srcif;
867 ep.sae_srcaddr = ep32.sae_srcaddr;
868 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
869 ep.sae_dstaddr = ep32.sae_dstaddr;
870 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
871 }
872
873 /*
874 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
875 * if this is a datagram socket; translate for other types.
876 */
877 dgram = (so->so_type == SOCK_DGRAM);
878
879 /* Get socket address now before we obtain socket lock */
880 if (ep.sae_srcaddr != USER_ADDR_NULL) {
881 if (ep.sae_srcaddrlen > sizeof(ss)) {
882 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
883 } else {
884 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
885 if (error == 0) {
886 src = SA(&ss);
887 }
888 }
889
890 if (error) {
891 goto out;
892 }
893 }
894
895 if (ep.sae_dstaddr == USER_ADDR_NULL) {
896 error = EINVAL;
897 goto out;
898 }
899
900 /* Get socket address now before we obtain socket lock */
901 if (ep.sae_dstaddrlen > sizeof(sd)) {
902 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
903 } else {
904 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
905 if (error == 0) {
906 dst = SA(&sd);
907 }
908 }
909
910 if (error) {
911 goto out;
912 }
913
914 VERIFY(dst != NULL);
915
916 if (uap->iov != USER_ADDR_NULL) {
917 /* Verify range before calling uio_create() */
918 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
919 error = EINVAL;
920 goto out;
921 }
922
923 if (uap->len == USER_ADDR_NULL) {
924 error = EINVAL;
925 goto out;
926 }
927
928 /* allocate a uio to hold the number of iovecs passed */
929 auio = uio_create(uap->iovcnt, 0,
930 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
931 UIO_WRITE);
932
933 if (auio == NULL) {
934 error = ENOMEM;
935 goto out;
936 }
937
938 /*
939 * get location of iovecs within the uio.
940 * then copyin the iovecs from user space.
941 */
942 iovp = uio_iovsaddr_user(auio);
943 if (iovp == NULL) {
944 error = ENOMEM;
945 goto out;
946 }
947 error = copyin_user_iovec_array(uap->iov,
948 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
949 uap->iovcnt, iovp);
950 if (error != 0) {
951 goto out;
952 }
953
954 /* finish setup of uio_t */
955 error = uio_calculateresid_user(auio);
956 if (error != 0) {
957 goto out;
958 }
959 }
960
961 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
962 &cid, auio, uap->flags, &bytes_written);
963 if (error == ERESTART) {
964 error = EINTR;
965 }
966
967 if (uap->len != USER_ADDR_NULL) {
968 if (IS_64BIT_PROCESS(p)) {
969 error1 = copyout(&bytes_written, uap->len, sizeof(user64_size_t));
970 } else {
971 error1 = copyout(&bytes_written, uap->len, sizeof(user32_size_t));
972 }
973 /* give precedence to connectitx errors */
974 if ((error1 != 0) && (error == 0)) {
975 error = error1;
976 }
977 }
978
979 if (uap->connid != USER_ADDR_NULL) {
980 error1 = copyout(&cid, uap->connid, sizeof(cid));
981 /* give precedence to connectitx errors */
982 if ((error1 != 0) && (error == 0)) {
983 error = error1;
984 }
985 }
986 out:
987 file_drop(fd);
988 if (auio != NULL) {
989 uio_free(auio);
990 }
991 if (src != NULL && src != SA(&ss)) {
992 free_sockaddr(src);
993 }
994 if (dst != NULL && dst != SA(&sd)) {
995 free_sockaddr(dst);
996 }
997 return error;
998 }
999
1000 int
connectx(proc_ref_t p,struct connectx_args * uap,int * retval)1001 connectx(proc_ref_t p, struct connectx_args *uap, int *retval)
1002 {
1003 /*
1004 * Due to similiarity with a POSIX interface, define as
1005 * an unofficial cancellation point.
1006 */
1007 __pthread_testcancel(1);
1008 return connectx_nocancel(p, uap, retval);
1009 }
1010
1011 static int
connectit(struct socket * so,sockaddr_ref_t sa)1012 connectit(struct socket *so, sockaddr_ref_t sa)
1013 {
1014 int error;
1015
1016 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
1017 #if CONFIG_MACF_SOCKET_SUBSET
1018 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
1019 return error;
1020 }
1021 #endif /* MAC_SOCKET_SUBSET */
1022
1023 socket_lock(so, 1);
1024 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1025 error = EALREADY;
1026 goto out;
1027 }
1028 error = soconnectlock(so, sa, 0);
1029 if (error != 0) {
1030 goto out;
1031 }
1032 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1033 error = EINPROGRESS;
1034 goto out;
1035 }
1036 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1037 lck_mtx_t *mutex_held;
1038
1039 if (so->so_proto->pr_getlock != NULL) {
1040 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1041 } else {
1042 mutex_held = so->so_proto->pr_domain->dom_mtx;
1043 }
1044 error = msleep((caddr_t)&so->so_timeo, mutex_held,
1045 PSOCK | PCATCH, __func__, 0);
1046 if (so->so_state & SS_DRAINING) {
1047 error = ECONNABORTED;
1048 }
1049 if (error != 0) {
1050 break;
1051 }
1052 }
1053 if (error == 0) {
1054 error = so->so_error;
1055 so->so_error = 0;
1056 }
1057 out:
1058 socket_unlock(so, 1);
1059 return error;
1060 }
1061
1062 static int
connectitx(struct socket * so,sockaddr_ref_t src,sockaddr_ref_t dst,proc_ref_t p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uio_t auio,unsigned int flags,user_ssize_t * bytes_written)1063 connectitx(struct socket *so, sockaddr_ref_t src,
1064 sockaddr_ref_t dst, proc_ref_t p, uint32_t ifscope,
1065 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
1066 user_ssize_t *bytes_written)
1067 {
1068 int error;
1069
1070 VERIFY(dst != NULL);
1071
1072 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1073 #if CONFIG_MACF_SOCKET_SUBSET
1074 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1075 return error;
1076 }
1077
1078 if (auio != NULL) {
1079 if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1080 return error;
1081 }
1082 }
1083 #endif /* MAC_SOCKET_SUBSET */
1084
1085 socket_lock(so, 1);
1086 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1087 error = EALREADY;
1088 goto out;
1089 }
1090
1091 error = soconnectxlocked(so, src, dst, p, ifscope,
1092 aid, pcid, flags, NULL, 0, auio, bytes_written);
1093 if (error != 0) {
1094 goto out;
1095 }
1096 /*
1097 * If, after the call to soconnectxlocked the flag is still set (in case
1098 * data has been queued and the connect() has actually been triggered,
1099 * it will have been unset by the transport), we exit immediately. There
1100 * is no reason to wait on any event.
1101 */
1102 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1103 error = 0;
1104 goto out;
1105 }
1106 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1107 error = EINPROGRESS;
1108 goto out;
1109 }
1110 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1111 lck_mtx_t *mutex_held;
1112
1113 if (so->so_proto->pr_getlock != NULL) {
1114 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1115 } else {
1116 mutex_held = so->so_proto->pr_domain->dom_mtx;
1117 }
1118 error = msleep((caddr_t)&so->so_timeo, mutex_held,
1119 PSOCK | PCATCH, __func__, 0);
1120 if (so->so_state & SS_DRAINING) {
1121 error = ECONNABORTED;
1122 }
1123 if (error != 0) {
1124 break;
1125 }
1126 }
1127 if (error == 0) {
1128 error = so->so_error;
1129 so->so_error = 0;
1130 }
1131 out:
1132 socket_unlock(so, 1);
1133 return error;
1134 }
1135
1136 int
peeloff(proc_ref_t p,struct peeloff_args * uap,int * retval)1137 peeloff(proc_ref_t p, struct peeloff_args *uap, int *retval)
1138 {
1139 #pragma unused(p, uap, retval)
1140 /*
1141 * Due to similiarity with a POSIX interface, define as
1142 * an unofficial cancellation point.
1143 */
1144 __pthread_testcancel(1);
1145 return 0;
1146 }
1147
1148 int
disconnectx(proc_ref_t p,struct disconnectx_args * uap,int * retval)1149 disconnectx(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1150 {
1151 /*
1152 * Due to similiarity with a POSIX interface, define as
1153 * an unofficial cancellation point.
1154 */
1155 __pthread_testcancel(1);
1156 return disconnectx_nocancel(p, uap, retval);
1157 }
1158
1159 static int
disconnectx_nocancel(proc_ref_t p,struct disconnectx_args * uap,int * retval)1160 disconnectx_nocancel(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1161 {
1162 #pragma unused(p, retval)
1163 socket_ref_t so;
1164 int fd = uap->s;
1165 int error;
1166
1167 error = file_socket(fd, &so);
1168 if (error != 0) {
1169 return error;
1170 }
1171 if (so == NULL) {
1172 error = EBADF;
1173 goto out;
1174 }
1175
1176 error = sodisconnectx(so, uap->aid, uap->cid);
1177 out:
1178 file_drop(fd);
1179 return error;
1180 }
1181
1182 /*
1183 * Returns: 0 Success
1184 * socreate:EAFNOSUPPORT
1185 * socreate:EPROTOTYPE
1186 * socreate:EPROTONOSUPPORT
1187 * socreate:ENOBUFS
1188 * socreate:ENOMEM
1189 * socreate:EISCONN
1190 * socreate:??? [other protocol families, IPSEC]
1191 * falloc:ENFILE
1192 * falloc:EMFILE
1193 * falloc:ENOMEM
1194 * copyout:EFAULT
1195 * soconnect2:EINVAL
1196 * soconnect2:EPROTOTYPE
1197 * soconnect2:??? [other protocol families[
1198 */
1199 int
socketpair(proc_ref_t p,struct socketpair_args * uap,__unused int32_ref_t retval)1200 socketpair(proc_ref_t p, struct socketpair_args *uap,
1201 __unused int32_ref_t retval)
1202 {
1203 fileproc_ref_t fp1, fp2;
1204 socket_ref_t so1, so2;
1205 int fd, error, sv[2];
1206
1207 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1208 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1209 if (error) {
1210 return error;
1211 }
1212 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1213 if (error) {
1214 goto free1;
1215 }
1216
1217 error = falloc(p, &fp1, &fd);
1218 if (error) {
1219 goto free2;
1220 }
1221 fp1->f_flag = FREAD | FWRITE;
1222 fp1->f_ops = &socketops;
1223 fp_set_data(fp1, so1);
1224 sv[0] = fd;
1225
1226 error = falloc(p, &fp2, &fd);
1227 if (error) {
1228 goto free3;
1229 }
1230 fp2->f_flag = FREAD | FWRITE;
1231 fp2->f_ops = &socketops;
1232 fp_set_data(fp2, so2);
1233 sv[1] = fd;
1234
1235 error = soconnect2(so1, so2);
1236 if (error) {
1237 goto free4;
1238 }
1239 if (uap->type == SOCK_DGRAM) {
1240 /*
1241 * Datagram socket connection is asymmetric.
1242 */
1243 error = soconnect2(so2, so1);
1244 if (error) {
1245 goto free4;
1246 }
1247 }
1248
1249 if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1250 goto free4;
1251 }
1252
1253 proc_fdlock(p);
1254 procfdtbl_releasefd(p, sv[0], NULL);
1255 procfdtbl_releasefd(p, sv[1], NULL);
1256 fp_drop(p, sv[0], fp1, 1);
1257 fp_drop(p, sv[1], fp2, 1);
1258 proc_fdunlock(p);
1259
1260 return 0;
1261 free4:
1262 fp_free(p, sv[1], fp2);
1263 free3:
1264 fp_free(p, sv[0], fp1);
1265 free2:
1266 (void) soclose(so2);
1267 free1:
1268 (void) soclose(so1);
1269 return error;
1270 }
1271
1272 /*
1273 * Returns: 0 Success
1274 * EINVAL
1275 * ENOBUFS
1276 * EBADF
1277 * EPIPE
1278 * EACCES Mandatory Access Control failure
1279 * file_socket:ENOTSOCK
1280 * file_socket:EBADF
1281 * getsockaddr:ENAMETOOLONG Filename too long
1282 * getsockaddr:EINVAL Invalid argument
1283 * getsockaddr:ENOMEM Not enough space
1284 * getsockaddr:EFAULT Bad address
1285 * <pru_sosend>:EACCES[TCP]
1286 * <pru_sosend>:EADDRINUSE[TCP]
1287 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1288 * <pru_sosend>:EAFNOSUPPORT[TCP]
1289 * <pru_sosend>:EAGAIN[TCP]
1290 * <pru_sosend>:EBADF
1291 * <pru_sosend>:ECONNRESET[TCP]
1292 * <pru_sosend>:EFAULT
1293 * <pru_sosend>:EHOSTUNREACH[TCP]
1294 * <pru_sosend>:EINTR
1295 * <pru_sosend>:EINVAL
1296 * <pru_sosend>:EISCONN[AF_INET]
1297 * <pru_sosend>:EMSGSIZE[TCP]
1298 * <pru_sosend>:ENETDOWN[TCP]
1299 * <pru_sosend>:ENETUNREACH[TCP]
1300 * <pru_sosend>:ENOBUFS
1301 * <pru_sosend>:ENOMEM[TCP]
1302 * <pru_sosend>:ENOTCONN[AF_INET]
1303 * <pru_sosend>:EOPNOTSUPP
1304 * <pru_sosend>:EPERM[TCP]
1305 * <pru_sosend>:EPIPE
1306 * <pru_sosend>:EWOULDBLOCK
1307 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1308 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1309 * <pru_sosend>:??? [value from so_error]
1310 * sockargs:???
1311 */
1312 static int
sendit(proc_ref_t p,struct socket * so,user_msghdr_ref_t mp,uio_t uiop,int flags,int32_ref_t retval)1313 sendit(proc_ref_t p, struct socket *so, user_msghdr_ref_t mp, uio_t uiop,
1314 int flags, int32_ref_t retval)
1315 {
1316 mbuf_ref_t control = NULL;
1317 struct sockaddr_storage ss;
1318 sockaddr_ref_t to = NULL;
1319 boolean_t want_free = TRUE;
1320 int error;
1321 user_ssize_t len;
1322
1323 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1324
1325 if (mp->msg_name != USER_ADDR_NULL) {
1326 if (mp->msg_namelen > sizeof(ss)) {
1327 error = getsockaddr(so, &to, mp->msg_name,
1328 mp->msg_namelen, TRUE);
1329 } else {
1330 error = getsockaddr_s(so, &ss, mp->msg_name,
1331 mp->msg_namelen, TRUE);
1332 if (error == 0) {
1333 to = SA(&ss);
1334 want_free = FALSE;
1335 }
1336 }
1337 if (error != 0) {
1338 goto out;
1339 }
1340 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1341 }
1342 if (mp->msg_control != USER_ADDR_NULL) {
1343 if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1344 error = EINVAL;
1345 goto bad;
1346 }
1347 error = sockargs(&control, mp->msg_control,
1348 mp->msg_controllen, MT_CONTROL);
1349 if (error != 0) {
1350 goto bad;
1351 }
1352 }
1353
1354 #if CONFIG_MACF_SOCKET_SUBSET
1355 /*
1356 * We check the state without holding the socket lock;
1357 * if a race condition occurs, it would simply result
1358 * in an extra call to the MAC check function.
1359 */
1360 if (to != NULL &&
1361 !(so->so_state & SS_DEFUNCT) &&
1362 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1363 if (control != NULL) {
1364 m_freem(control);
1365 }
1366
1367 goto bad;
1368 }
1369 #endif /* MAC_SOCKET_SUBSET */
1370
1371 len = uio_resid(uiop);
1372 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1373 control, flags);
1374 if (error != 0) {
1375 if (uio_resid(uiop) != len && (error == ERESTART ||
1376 error == EINTR || error == EWOULDBLOCK)) {
1377 error = 0;
1378 }
1379 /* Generation of SIGPIPE can be controlled per socket */
1380 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1381 !(flags & MSG_NOSIGNAL)) {
1382 psignal(p, SIGPIPE);
1383 }
1384 }
1385 if (error == 0) {
1386 *retval = (int)(len - uio_resid(uiop));
1387 }
1388 bad:
1389 if (want_free) {
1390 free_sockaddr(to);
1391 }
1392 out:
1393 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1394
1395 return error;
1396 }
1397
1398 /*
1399 * Returns: 0 Success
1400 * ENOMEM
1401 * sendit:??? [see sendit definition in this file]
1402 * write:??? [4056224: applicable for pipes]
1403 */
1404 int
sendto(proc_ref_t p,struct sendto_args * uap,int32_ref_t retval)1405 sendto(proc_ref_t p, struct sendto_args *uap, int32_ref_t retval)
1406 {
1407 __pthread_testcancel(1);
1408 return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1409 }
1410
1411 int
sendto_nocancel(proc_ref_t p,struct sendto_nocancel_args * uap,int32_ref_t retval)1412 sendto_nocancel(proc_ref_t p,
1413 struct sendto_nocancel_args *uap,
1414 int32_ref_t retval)
1415 {
1416 struct user_msghdr msg;
1417 int error;
1418 uio_t auio = NULL;
1419 socket_ref_t so;
1420
1421 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1422 AUDIT_ARG(fd, uap->s);
1423
1424 if (uap->flags & MSG_SKIPCFIL) {
1425 error = EPERM;
1426 goto done;
1427 }
1428
1429 if (uap->len > LONG_MAX) {
1430 error = EINVAL;
1431 goto done;
1432 }
1433
1434 auio = uio_create(1, 0,
1435 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1436 UIO_WRITE);
1437 if (auio == NULL) {
1438 error = ENOMEM;
1439 goto done;
1440 }
1441 uio_addiov(auio, uap->buf, uap->len);
1442
1443 msg.msg_name = uap->to;
1444 msg.msg_namelen = uap->tolen;
1445 /* no need to set up msg_iov. sendit uses uio_t we send it */
1446 msg.msg_iov = 0;
1447 msg.msg_iovlen = 0;
1448 msg.msg_control = 0;
1449 msg.msg_flags = 0;
1450
1451 error = file_socket(uap->s, &so);
1452 if (error) {
1453 goto done;
1454 }
1455
1456 if (so == NULL) {
1457 error = EBADF;
1458 } else {
1459 error = sendit(p, so, &msg, auio, uap->flags, retval);
1460 }
1461
1462 file_drop(uap->s);
1463 done:
1464 if (auio != NULL) {
1465 uio_free(auio);
1466 }
1467
1468 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1469
1470 return error;
1471 }
1472
1473 /*
1474 * Returns: 0 Success
1475 * ENOBUFS
1476 * copyin:EFAULT
1477 * sendit:??? [see sendit definition in this file]
1478 */
1479 int
sendmsg(proc_ref_t p,struct sendmsg_args * uap,int32_ref_t retval)1480 sendmsg(proc_ref_t p, struct sendmsg_args *uap, int32_ref_t retval)
1481 {
1482 __pthread_testcancel(1);
1483 return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1484 retval);
1485 }
1486
1487 int
sendmsg_nocancel(proc_ref_t p,struct sendmsg_nocancel_args * uap,int32_ref_t retval)1488 sendmsg_nocancel(proc_ref_t p, struct sendmsg_nocancel_args *uap,
1489 int32_ref_t retval)
1490 {
1491 struct user32_msghdr msg32;
1492 struct user64_msghdr msg64;
1493 struct user_msghdr user_msg;
1494 caddr_t msghdrp;
1495 int size_of_msghdr;
1496 int error;
1497 uio_t auio = NULL;
1498 struct user_iovec *iovp;
1499 socket_ref_t so;
1500
1501 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1502
1503 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1504 AUDIT_ARG(fd, uap->s);
1505
1506 if (uap->flags & MSG_SKIPCFIL) {
1507 error = EPERM;
1508 goto done;
1509 }
1510
1511 if (is_p_64bit_process) {
1512 msghdrp = (caddr_t)&msg64;
1513 size_of_msghdr = sizeof(msg64);
1514 } else {
1515 msghdrp = (caddr_t)&msg32;
1516 size_of_msghdr = sizeof(msg32);
1517 }
1518 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1519 if (error) {
1520 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1521 return error;
1522 }
1523
1524 if (is_p_64bit_process) {
1525 user_msg.msg_flags = msg64.msg_flags;
1526 user_msg.msg_controllen = msg64.msg_controllen;
1527 user_msg.msg_control = (user_addr_t)msg64.msg_control;
1528 user_msg.msg_iovlen = msg64.msg_iovlen;
1529 user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
1530 user_msg.msg_namelen = msg64.msg_namelen;
1531 user_msg.msg_name = (user_addr_t)msg64.msg_name;
1532 } else {
1533 user_msg.msg_flags = msg32.msg_flags;
1534 user_msg.msg_controllen = msg32.msg_controllen;
1535 user_msg.msg_control = msg32.msg_control;
1536 user_msg.msg_iovlen = msg32.msg_iovlen;
1537 user_msg.msg_iov = msg32.msg_iov;
1538 user_msg.msg_namelen = msg32.msg_namelen;
1539 user_msg.msg_name = msg32.msg_name;
1540 }
1541
1542 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1543 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1544 0, 0, 0, 0);
1545 return EMSGSIZE;
1546 }
1547
1548 /* allocate a uio large enough to hold the number of iovecs passed */
1549 auio = uio_create(user_msg.msg_iovlen, 0,
1550 (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1551 UIO_WRITE);
1552 if (auio == NULL) {
1553 error = ENOBUFS;
1554 goto done;
1555 }
1556
1557 if (user_msg.msg_iovlen) {
1558 /*
1559 * get location of iovecs within the uio.
1560 * then copyin the iovecs from user space.
1561 */
1562 iovp = uio_iovsaddr_user(auio);
1563 if (iovp == NULL) {
1564 error = ENOBUFS;
1565 goto done;
1566 }
1567 error = copyin_user_iovec_array(user_msg.msg_iov,
1568 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1569 user_msg.msg_iovlen, iovp);
1570 if (error) {
1571 goto done;
1572 }
1573 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1574
1575 /* finish setup of uio_t */
1576 error = uio_calculateresid_user(auio);
1577 if (error) {
1578 goto done;
1579 }
1580 } else {
1581 user_msg.msg_iov = 0;
1582 }
1583
1584 /* msg_flags is ignored for send */
1585 user_msg.msg_flags = 0;
1586
1587 error = file_socket(uap->s, &so);
1588 if (error) {
1589 goto done;
1590 }
1591 if (so == NULL) {
1592 error = EBADF;
1593 } else {
1594 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1595 }
1596 file_drop(uap->s);
1597 done:
1598 if (auio != NULL) {
1599 uio_free(auio);
1600 }
1601 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1602
1603 return error;
1604 }
1605
1606 static int
internalize_user_msg_x(struct user_msghdr * user_msg,uio_t * auiop,proc_ref_t p,void_ptr_t user_msghdr_x_src)1607 internalize_user_msg_x(struct user_msghdr *user_msg, uio_t *auiop, proc_ref_t p, void_ptr_t user_msghdr_x_src)
1608 {
1609 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1610 uio_t auio = *auiop;
1611 int error;
1612
1613 if (is_p_64bit_process) {
1614 struct user64_msghdr_x msghdrx64;
1615
1616 error = copyin((user_addr_t)user_msghdr_x_src,
1617 &msghdrx64, sizeof(msghdrx64));
1618 if (error != 0) {
1619 DBG_PRINTF("%s copyin() msghdrx64 failed %d",
1620 __func__, error);
1621 goto done;
1622 }
1623 user_msg->msg_name = msghdrx64.msg_name;
1624 user_msg->msg_namelen = msghdrx64.msg_namelen;
1625 user_msg->msg_iov = msghdrx64.msg_iov;
1626 user_msg->msg_iovlen = msghdrx64.msg_iovlen;
1627 user_msg->msg_control = msghdrx64.msg_control;
1628 user_msg->msg_controllen = msghdrx64.msg_controllen;
1629 } else {
1630 struct user32_msghdr_x msghdrx32;
1631
1632 error = copyin((user_addr_t)user_msghdr_x_src,
1633 &msghdrx32, sizeof(msghdrx32));
1634 if (error != 0) {
1635 DBG_PRINTF("%s copyin() msghdrx32 failed %d",
1636 __func__, error);
1637 goto done;
1638 }
1639 user_msg->msg_name = msghdrx32.msg_name;
1640 user_msg->msg_namelen = msghdrx32.msg_namelen;
1641 user_msg->msg_iov = msghdrx32.msg_iov;
1642 user_msg->msg_iovlen = msghdrx32.msg_iovlen;
1643 user_msg->msg_control = msghdrx32.msg_control;
1644 user_msg->msg_controllen = msghdrx32.msg_controllen;
1645 }
1646 /* msg_flags is ignored for send */
1647 user_msg->msg_flags = 0;
1648
1649 if (user_msg->msg_iovlen <= 0 || user_msg->msg_iovlen > UIO_MAXIOV) {
1650 error = EMSGSIZE;
1651 DBG_PRINTF("%s bad msg_iovlen, error %d",
1652 __func__, error);
1653 goto done;
1654 }
1655 /*
1656 * Attempt to reuse the uio if large enough, otherwise we need
1657 * a new one
1658 */
1659 if (auio != NULL) {
1660 if (auio->uio_max_iovs >= user_msg->msg_iovlen) {
1661 uio_reset_fast(auio, 0,
1662 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1663 UIO_WRITE);
1664 } else {
1665 uio_free(auio);
1666 auio = NULL;
1667 }
1668 }
1669 if (auio == NULL) {
1670 auio = uio_create(user_msg->msg_iovlen, 0,
1671 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1672 UIO_WRITE);
1673 if (auio == NULL) {
1674 error = ENOBUFS;
1675 DBG_PRINTF("%s uio_create() failed %d",
1676 __func__, error);
1677 goto done;
1678 }
1679 }
1680
1681 if (user_msg->msg_iovlen) {
1682 /*
1683 * get location of iovecs within the uio.
1684 * then copyin the iovecs from user space.
1685 */
1686 struct user_iovec *iovp = uio_iovsaddr_user(auio);
1687 if (iovp == NULL) {
1688 error = ENOBUFS;
1689 goto done;
1690 }
1691 error = copyin_user_iovec_array(user_msg->msg_iov,
1692 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1693 user_msg->msg_iovlen, iovp);
1694 if (error != 0) {
1695 goto done;
1696 }
1697 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
1698
1699 /* finish setup of uio_t */
1700 error = uio_calculateresid_user(auio);
1701 if (error) {
1702 goto done;
1703 }
1704 } else {
1705 user_msg->msg_iov = 0;
1706 }
1707
1708 done:
1709 *auiop = auio;
1710 return error;
1711 }
1712
1713 static int
mbuf_packet_from_uio(socket_ref_t so,mbuf_ref_ref_t mp,uio_t auio)1714 mbuf_packet_from_uio(socket_ref_t so, mbuf_ref_ref_t mp, uio_t auio)
1715 {
1716 int error = 0;
1717 uint16_t headroom = 0;
1718 size_t bytes_to_alloc;
1719 mbuf_ref_t top = NULL, m;
1720
1721 if (soreserveheadroom != 0) {
1722 headroom = so->so_pktheadroom;
1723 }
1724 bytes_to_alloc = headroom + uio_resid(auio);
1725
1726 error = mbuf_allocpacket(MBUF_WAITOK, bytes_to_alloc, NULL, &top);
1727 if (error != 0) {
1728 os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: mbuf_allocpacket %zu error %d",
1729 bytes_to_alloc, error);
1730 goto done;
1731 }
1732
1733 if (headroom > 0 && headroom < mbuf_maxlen(top)) {
1734 top->m_data += headroom;
1735 }
1736
1737 for (m = top; m != NULL; m = m->m_next) {
1738 int bytes_to_copy = (int)uio_resid(auio);
1739 ssize_t mlen;
1740
1741 if ((m->m_flags & M_EXT)) {
1742 mlen = m->m_ext.ext_size -
1743 M_LEADINGSPACE(m);
1744 } else if ((m->m_flags & M_PKTHDR)) {
1745 mlen = MHLEN - M_LEADINGSPACE(m);
1746 m_add_crumb(m, PKT_CRUMB_SOSEND);
1747 } else {
1748 mlen = MLEN - M_LEADINGSPACE(m);
1749 }
1750 int len = imin((int)mlen, bytes_to_copy);
1751
1752 error = uio_copyin_user(mtod(m, caddr_t), (int)len, auio);
1753 if (error != 0) {
1754 os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: len %d error %d",
1755 len, error);
1756 goto done;
1757 }
1758 m->m_len = len;
1759 top->m_pkthdr.len += len;
1760 }
1761
1762 done:
1763 if (error != 0) {
1764 m_freem(top);
1765 } else {
1766 *mp = top;
1767 }
1768 return error;
1769 }
1770
1771 static int
sendit_x(proc_ref_t p,socket_ref_t so,struct sendmsg_x_args * uap,u_int * retval)1772 sendit_x(proc_ref_t p, socket_ref_t so, struct sendmsg_x_args *uap, u_int *retval)
1773 {
1774 int error = 0;
1775 uio_t __single auio = NULL;
1776 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1777 void *src;
1778 MBUFQ_HEAD() pktlist = {};
1779 size_t total_pkt_len = 0;
1780 u_int pkt_cnt = 0;
1781 int flags = uap->flags;
1782 mbuf_ref_t top;
1783
1784 MBUFQ_INIT(&pktlist);
1785
1786 *retval = 0;
1787
1788 /* We re-use the uio when possible */
1789 auio = uio_create(1, 0,
1790 (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1791 UIO_WRITE);
1792 if (auio == NULL) {
1793 error = ENOBUFS;
1794 DBG_PRINTF("%s uio_create() failed %d",
1795 __func__, error);
1796 goto done;
1797 }
1798
1799 src = __unsafe_forge_bidi_indexable(void *, uap->msgp, uap->cnt);
1800
1801 /*
1802 * Create a list of packets
1803 */
1804 for (u_int i = 0; i < uap->cnt; i++) {
1805 struct user_msghdr user_msg = {};
1806 mbuf_ref_t m = NULL;
1807
1808 if (is_p_64bit_process) {
1809 error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user64_msghdr_x *)src) + i);
1810 if (error != 0) {
1811 os_log(OS_LOG_DEFAULT, "sendit_x: internalize_user_msg_x error %d\n", error);
1812 goto done;
1813 }
1814 } else {
1815 error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user32_msghdr_x *)src) + i);
1816 if (error != 0) {
1817 os_log(OS_LOG_DEFAULT, "sendit_x: internalize_user_msg_x error %d\n", error);
1818 goto done;
1819 }
1820 }
1821 /*
1822 * Stop on the first datagram that is too large
1823 */
1824 if (uio_resid(auio) > so->so_snd.sb_hiwat) {
1825 if (i == 0) {
1826 error = EMSGSIZE;
1827 goto done;
1828 }
1829 break;
1830 }
1831 /*
1832 * An mbuf packet has the control mbuf(s) followed by data
1833 * We allocate the mbufs in reverse order
1834 */
1835 error = mbuf_packet_from_uio(so, &m, auio);
1836 if (error != 0) {
1837 os_log(OS_LOG_DEFAULT, "sendit_x: mbuf_packet_from_uio error %d\n", error);
1838 goto done;
1839 }
1840 total_pkt_len += m->m_pkthdr.len;
1841
1842 if (user_msg.msg_control != USER_ADDR_NULL && user_msg.msg_controllen != 0) {
1843 mbuf_ref_t control = NULL;
1844
1845 error = sockargs(&control, user_msg.msg_control, user_msg.msg_controllen, MT_CONTROL);
1846 if (error != 0) {
1847 os_log(OS_LOG_DEFAULT, "sendit_x: sockargs error %d\n", error);
1848 goto done;
1849 }
1850 control->m_next = m;
1851 m = control;
1852 }
1853 MBUFQ_ENQUEUE(&pktlist, m);
1854
1855 pkt_cnt += 1;
1856 }
1857
1858 top = MBUFQ_FIRST(&pktlist);
1859 MBUFQ_INIT(&pktlist);
1860 error = sosend_list(so, top, total_pkt_len, &pkt_cnt, flags);
1861 if (error != 0 && error != ENOBUFS) {
1862 os_log(OS_LOG_DEFAULT, "sendit_x: sosend_list error %d\n", error);
1863 }
1864 done:
1865 *retval = pkt_cnt;
1866
1867 if (auio != NULL) {
1868 uio_free(auio);
1869 }
1870 MBUFQ_DRAIN(&pktlist);
1871 return error;
1872 }
1873
1874 int
sendmsg_x(proc_ref_t p,struct sendmsg_x_args * uap,user_ssize_t * retval)1875 sendmsg_x(proc_ref_t p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1876 {
1877 void *src;
1878 int error;
1879 uio_t __single auio = NULL;
1880 socket_ref_t so;
1881 u_int uiocnt = 0;
1882 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1883
1884 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1885 AUDIT_ARG(fd, uap->s);
1886
1887 if (uap->flags & MSG_SKIPCFIL) {
1888 error = EPERM;
1889 goto done_no_filedrop;
1890 }
1891
1892 error = file_socket(uap->s, &so);
1893 if (error) {
1894 goto done_no_filedrop;
1895 }
1896 if (so == NULL) {
1897 error = EBADF;
1898 goto done;
1899 }
1900
1901 /*
1902 * For an atomic datagram connected socket we can build the list of
1903 * mbuf packets with sosend_list()
1904 */
1905 if (so->so_type == SOCK_DGRAM && sosendallatonce(so) &&
1906 (so->so_state & SS_ISCONNECTED) && sendmsg_x_mode != 1) {
1907 error = sendit_x(p, so, uap, &uiocnt);
1908 if (error != 0) {
1909 DBG_PRINTF("%s sendit_x() failed %d",
1910 __func__, error);
1911 }
1912 goto done;
1913 }
1914
1915 src = __unsafe_forge_bidi_indexable(void *, uap->msgp, uap->cnt);
1916
1917 /* We re-use the uio when possible */
1918 auio = uio_create(1, 0,
1919 (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1920 UIO_WRITE);
1921 if (auio == NULL) {
1922 error = ENOBUFS;
1923 DBG_PRINTF("%s uio_create() failed %d",
1924 __func__, error);
1925 goto done;
1926 }
1927
1928 for (u_int i = 0; i < uap->cnt; i++) {
1929 struct user_msghdr user_msg = {};
1930
1931 if (is_p_64bit_process) {
1932 error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user64_msghdr_x *)src) + i);
1933 if (error != 0) {
1934 goto done;
1935 }
1936 } else {
1937 error = internalize_user_msg_x(&user_msg, &auio, p, ((struct user32_msghdr_x *)src) + i);
1938 if (error != 0) {
1939 goto done;
1940 }
1941 }
1942
1943 int32_t len = 0;
1944 error = sendit(p, so, &user_msg, auio, uap->flags, &len);
1945 if (error != 0) {
1946 break;
1947 }
1948 uiocnt += 1;
1949 }
1950 done:
1951 if (error != 0) {
1952 if (uiocnt != 0 && (error == ERESTART ||
1953 error == EINTR || error == EWOULDBLOCK ||
1954 error == ENOBUFS || error == EMSGSIZE)) {
1955 error = 0;
1956 }
1957 /* Generation of SIGPIPE can be controlled per socket */
1958 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1959 !(uap->flags & MSG_NOSIGNAL)) {
1960 psignal(p, SIGPIPE);
1961 }
1962 }
1963 if (error == 0) {
1964 *retval = (int)(uiocnt);
1965 }
1966 file_drop(uap->s);
1967
1968 done_no_filedrop:
1969 if (auio != NULL) {
1970 uio_free(auio);
1971 }
1972 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1973
1974 return error;
1975 }
1976
1977
1978 static int
copyout_sa(sockaddr_ref_t fromsa,user_addr_t name,socklen_t * namelen)1979 copyout_sa(sockaddr_ref_t fromsa, user_addr_t name, socklen_t *namelen)
1980 {
1981 int error = 0;
1982 socklen_t sa_len = 0;
1983 ssize_t len;
1984
1985 len = *namelen;
1986 if (len <= 0 || fromsa == 0) {
1987 len = 0;
1988 } else {
1989 #ifndef MIN
1990 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1991 #endif
1992 sa_len = fromsa->sa_len;
1993 len = MIN((unsigned int)len, sa_len);
1994 error = copyout(__SA_UTILS_CONV_TO_BYTES(fromsa), name, (unsigned)len);
1995 if (error) {
1996 goto out;
1997 }
1998 }
1999 *namelen = sa_len;
2000 out:
2001 return 0;
2002 }
2003
2004 static int
copyout_maddr(struct mbuf * m,user_addr_t name,socklen_t * namelen)2005 copyout_maddr(struct mbuf *m, user_addr_t name, socklen_t *namelen)
2006 {
2007 int error = 0;
2008 socklen_t sa_len = 0;
2009 ssize_t len;
2010
2011 len = *namelen;
2012 if (len <= 0 || m == NULL) {
2013 len = 0;
2014 } else {
2015 #ifndef MIN
2016 #define MIN(a, b) ((a) > (b) ? (b) : (a))
2017 #endif
2018 struct sockaddr *fromsa = mtod(m, struct sockaddr *);
2019
2020 sa_len = fromsa->sa_len;
2021 len = MIN((unsigned int)len, sa_len);
2022 error = copyout(fromsa, name, (unsigned)len);
2023 if (error != 0) {
2024 goto out;
2025 }
2026 }
2027 *namelen = sa_len;
2028 out:
2029 return 0;
2030 }
2031
2032 static int
copyout_control(proc_ref_t p,mbuf_ref_t m,user_addr_t control,socklen_ref_t controllen,int_ref_t flags,socket_ref_t so)2033 copyout_control(proc_ref_t p, mbuf_ref_t m, user_addr_t control,
2034 socklen_ref_t controllen, int_ref_t flags, socket_ref_t so)
2035 {
2036 int error = 0;
2037 socklen_t len;
2038 user_addr_t ctlbuf;
2039 struct inpcb *inp = NULL;
2040 bool want_pktinfo = false;
2041 bool seen_pktinfo = false;
2042
2043 if (so != NULL && (SOCK_DOM(so) == PF_INET6 || SOCK_DOM(so) == PF_INET)) {
2044 inp = sotoinpcb(so);
2045 want_pktinfo = (inp->inp_flags & IN6P_PKTINFO) != 0;
2046 }
2047
2048 len = *controllen;
2049 *controllen = 0;
2050 ctlbuf = control;
2051
2052 while (m && len > 0) {
2053 socklen_t tocopy;
2054 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
2055 socklen_t cp_size = CMSG_ALIGN(cp->cmsg_len);
2056 socklen_t buflen = m->m_len;
2057
2058 while (buflen > 0 && len > 0) {
2059 /*
2060 * SCM_TIMESTAMP hack because struct timeval has a
2061 * different size for 32 bits and 64 bits processes
2062 */
2063 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
2064 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
2065 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
2066 socklen_t tmp_space;
2067 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
2068
2069 tmp_cp->cmsg_level = SOL_SOCKET;
2070 tmp_cp->cmsg_type = SCM_TIMESTAMP;
2071
2072 if (proc_is64bit(p)) {
2073 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
2074
2075 os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
2076 os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
2077
2078 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
2079 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
2080 } else {
2081 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
2082
2083 tv32->tv_sec = (user32_time_t)tv->tv_sec;
2084 tv32->tv_usec = tv->tv_usec;
2085
2086 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
2087 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
2088 }
2089 if (len >= tmp_space) {
2090 tocopy = tmp_space;
2091 } else {
2092 *flags |= MSG_CTRUNC;
2093 tocopy = len;
2094 }
2095 error = copyout(tmp_buffer, ctlbuf, tocopy);
2096 if (error) {
2097 goto out;
2098 }
2099 } else {
2100 /* If socket has flow tracking and socket did not request address, ignore it */
2101 if (SOFLOW_ENABLED(so) &&
2102 ((cp->cmsg_level == IPPROTO_IP && cp->cmsg_type == IP_RECVDSTADDR && inp != NULL &&
2103 !(inp->inp_flags & INP_RECVDSTADDR)) ||
2104 (cp->cmsg_level == IPPROTO_IPV6 && (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO) && inp &&
2105 !(inp->inp_flags & IN6P_PKTINFO)))) {
2106 tocopy = 0;
2107 } else {
2108 if (cp_size > buflen) {
2109 panic("cp_size > buflen, something wrong with alignment!");
2110 }
2111 if (len >= cp_size) {
2112 tocopy = cp_size;
2113 } else {
2114 *flags |= MSG_CTRUNC;
2115 tocopy = len;
2116 }
2117 error = copyout((caddr_t) cp, ctlbuf, tocopy);
2118 if (error) {
2119 goto out;
2120 }
2121 if (want_pktinfo && cp->cmsg_level == IPPROTO_IPV6 &&
2122 (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO)) {
2123 seen_pktinfo = true;
2124 }
2125 }
2126 }
2127
2128
2129 ctlbuf += tocopy;
2130 len -= tocopy;
2131
2132 buflen -= cp_size;
2133 cp = (struct cmsghdr *)(void *)
2134 ((unsigned char *) cp + cp_size);
2135 cp_size = CMSG_ALIGN(cp->cmsg_len);
2136 }
2137
2138 m = m->m_next;
2139 }
2140 *controllen = (socklen_t)(ctlbuf - control);
2141 out:
2142 if (want_pktinfo && !seen_pktinfo) {
2143 missingpktinfo += 1;
2144 #if (DEBUG || DEVELOPMENT)
2145 char pname[MAXCOMLEN];
2146 char local[MAX_IPv6_STR_LEN + 6];
2147 char remote[MAX_IPv6_STR_LEN + 6];
2148
2149 proc_name(so->last_pid, pname, sizeof(MAXCOMLEN));
2150 if (inp->inp_vflag & INP_IPV6) {
2151 inet_ntop(AF_INET6, &inp->in6p_laddr.s6_addr, local, sizeof(local));
2152 inet_ntop(AF_INET6, &inp->in6p_faddr.s6_addr, remote, sizeof(local));
2153 } else {
2154 inet_ntop(AF_INET, &inp->inp_laddr.s_addr, local, sizeof(local));
2155 inet_ntop(AF_INET, &inp->inp_faddr.s_addr, remote, sizeof(local));
2156 }
2157
2158 os_log(OS_LOG_DEFAULT,
2159 "cmsg IPV6_PKTINFO missing for %s:%u > %s:%u proc %s.%u error %d\n",
2160 local, ntohs(inp->inp_lport), remote, ntohs(inp->inp_fport),
2161 pname, so->last_pid, error);
2162 #endif /* (DEBUG || DEVELOPMENT) */
2163 }
2164 return error;
2165 }
2166
2167 /*
2168 * Returns: 0 Success
2169 * ENOTSOCK
2170 * EINVAL
2171 * EBADF
2172 * EACCES Mandatory Access Control failure
2173 * copyout:EFAULT
2174 * fp_lookup:EBADF
2175 * <pru_soreceive>:ENOBUFS
2176 * <pru_soreceive>:ENOTCONN
2177 * <pru_soreceive>:EWOULDBLOCK
2178 * <pru_soreceive>:EFAULT
2179 * <pru_soreceive>:EINTR
2180 * <pru_soreceive>:EBADF
2181 * <pru_soreceive>:EINVAL
2182 * <pru_soreceive>:EMSGSIZE
2183 * <pru_soreceive>:???
2184 *
2185 * Notes: Additional return values from calls through <pru_soreceive>
2186 * depend on protocols other than TCP or AF_UNIX, which are
2187 * documented above.
2188 */
2189 static int
recvit(proc_ref_t p,int s,user_msghdr_ref_t mp,uio_t uiop,user_addr_t namelenp,int32_ref_t retval)2190 recvit(proc_ref_t p, int s, user_msghdr_ref_t mp, uio_t uiop,
2191 user_addr_t namelenp, int32_ref_t retval)
2192 {
2193 ssize_t len;
2194 int error;
2195 mbuf_ref_t control = 0;
2196 socket_ref_t so;
2197 sockaddr_ref_t fromsa = 0;
2198 fileproc_ref_t fp;
2199
2200 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
2201 if ((error = fp_get_ftype(p, s, DTYPE_SOCKET, ENOTSOCK, &fp))) {
2202 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2203 return error;
2204 }
2205 so = (struct socket *)fp_get_data(fp);
2206
2207 #if CONFIG_MACF_SOCKET_SUBSET
2208 /*
2209 * We check the state without holding the socket lock;
2210 * if a race condition occurs, it would simply result
2211 * in an extra call to the MAC check function.
2212 */
2213 if (!(so->so_state & SS_DEFUNCT) &&
2214 !(so->so_state & SS_ISCONNECTED) &&
2215 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2216 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2217 goto out1;
2218 }
2219 #endif /* MAC_SOCKET_SUBSET */
2220 if (uio_resid(uiop) < 0 || uio_resid(uiop) > INT_MAX) {
2221 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
2222 error = EINVAL;
2223 goto out1;
2224 }
2225
2226 len = uio_resid(uiop);
2227 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
2228 NULL, mp->msg_control ? &control : NULL,
2229 &mp->msg_flags);
2230 if (fromsa) {
2231 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
2232 fromsa);
2233 }
2234 if (error) {
2235 if (uio_resid(uiop) != len && (error == ERESTART ||
2236 error == EINTR || error == EWOULDBLOCK)) {
2237 error = 0;
2238 }
2239 }
2240 if (error) {
2241 goto out;
2242 }
2243
2244 *retval = (int32_t)(len - uio_resid(uiop));
2245
2246 if (mp->msg_name) {
2247 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
2248 if (error) {
2249 goto out;
2250 }
2251 /* return the actual, untruncated address length */
2252 if (namelenp &&
2253 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2254 sizeof(int)))) {
2255 goto out;
2256 }
2257 }
2258
2259 if (mp->msg_control) {
2260 error = copyout_control(p, control, mp->msg_control,
2261 &mp->msg_controllen, &mp->msg_flags, so);
2262 }
2263 out:
2264 free_sockaddr(fromsa);
2265 if (control) {
2266 m_freem(control);
2267 }
2268 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2269 out1:
2270 fp_drop(p, s, fp, 0);
2271 return error;
2272 }
2273
2274 /*
2275 * Returns: 0 Success
2276 * ENOMEM
2277 * copyin:EFAULT
2278 * recvit:???
2279 * read:??? [4056224: applicable for pipes]
2280 *
2281 * Notes: The read entry point is only called as part of support for
2282 * binary backward compatability; new code should use read
2283 * instead of recv or recvfrom when attempting to read data
2284 * from pipes.
2285 *
2286 * For full documentation of the return codes from recvit, see
2287 * the block header for the recvit function.
2288 */
2289 int
recvfrom(proc_ref_t p,struct recvfrom_args * uap,int32_ref_t retval)2290 recvfrom(proc_ref_t p, struct recvfrom_args *uap, int32_ref_t retval)
2291 {
2292 __pthread_testcancel(1);
2293 return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2294 retval);
2295 }
2296
2297 int
recvfrom_nocancel(proc_ref_t p,struct recvfrom_nocancel_args * uap,int32_ref_t retval)2298 recvfrom_nocancel(proc_ref_t p, struct recvfrom_nocancel_args *uap,
2299 int32_ref_t retval)
2300 {
2301 struct user_msghdr msg;
2302 int error;
2303 uio_t __single auio = NULL;
2304
2305 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2306 AUDIT_ARG(fd, uap->s);
2307
2308 if (uap->fromlenaddr) {
2309 error = copyin(uap->fromlenaddr,
2310 (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2311 if (error) {
2312 return error;
2313 }
2314 } else {
2315 msg.msg_namelen = 0;
2316 }
2317 msg.msg_name = uap->from;
2318 auio = uio_create(1, 0,
2319 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2320 UIO_READ);
2321 if (auio == NULL) {
2322 return ENOMEM;
2323 }
2324
2325 uio_addiov(auio, uap->buf, uap->len);
2326 /* no need to set up msg_iov. recvit uses uio_t we send it */
2327 msg.msg_iov = 0;
2328 msg.msg_iovlen = 0;
2329 msg.msg_control = 0;
2330 msg.msg_controllen = 0;
2331 msg.msg_flags = uap->flags;
2332 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2333 if (auio != NULL) {
2334 uio_free(auio);
2335 }
2336
2337 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2338
2339 return error;
2340 }
2341
2342 /*
2343 * Returns: 0 Success
2344 * EMSGSIZE
2345 * ENOMEM
2346 * copyin:EFAULT
2347 * copyout:EFAULT
2348 * recvit:???
2349 *
2350 * Notes: For full documentation of the return codes from recvit, see
2351 * the block header for the recvit function.
2352 */
2353 int
recvmsg(proc_ref_t p,struct recvmsg_args * uap,int32_ref_t retval)2354 recvmsg(proc_ref_t p, struct recvmsg_args *uap, int32_ref_t retval)
2355 {
2356 __pthread_testcancel(1);
2357 return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2358 retval);
2359 }
2360
2361 int
recvmsg_nocancel(proc_ref_t p,struct recvmsg_nocancel_args * uap,int32_ref_t retval)2362 recvmsg_nocancel(proc_ref_t p, struct recvmsg_nocancel_args *uap,
2363 int32_ref_t retval)
2364 {
2365 struct user32_msghdr msg32;
2366 struct user64_msghdr msg64;
2367 struct user_msghdr user_msg;
2368 caddr_t msghdrp;
2369 int size_of_msghdr;
2370 user_addr_t uiov;
2371 int error;
2372 uio_t __single auio = NULL;
2373 struct user_iovec *iovp;
2374
2375 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2376
2377 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2378 AUDIT_ARG(fd, uap->s);
2379 if (is_p_64bit_process) {
2380 msghdrp = (caddr_t)&msg64;
2381 size_of_msghdr = sizeof(msg64);
2382 } else {
2383 msghdrp = (caddr_t)&msg32;
2384 size_of_msghdr = sizeof(msg32);
2385 }
2386 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2387 if (error) {
2388 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2389 return error;
2390 }
2391
2392 /* only need to copy if user process is not 64-bit */
2393 if (is_p_64bit_process) {
2394 user_msg.msg_flags = msg64.msg_flags;
2395 user_msg.msg_controllen = msg64.msg_controllen;
2396 user_msg.msg_control = (user_addr_t)msg64.msg_control;
2397 user_msg.msg_iovlen = msg64.msg_iovlen;
2398 user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
2399 user_msg.msg_namelen = msg64.msg_namelen;
2400 user_msg.msg_name = (user_addr_t)msg64.msg_name;
2401 } else {
2402 user_msg.msg_flags = msg32.msg_flags;
2403 user_msg.msg_controllen = msg32.msg_controllen;
2404 user_msg.msg_control = msg32.msg_control;
2405 user_msg.msg_iovlen = msg32.msg_iovlen;
2406 user_msg.msg_iov = msg32.msg_iov;
2407 user_msg.msg_namelen = msg32.msg_namelen;
2408 user_msg.msg_name = msg32.msg_name;
2409 }
2410
2411 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2412 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2413 0, 0, 0, 0);
2414 return EMSGSIZE;
2415 }
2416
2417 user_msg.msg_flags = uap->flags;
2418
2419 /* allocate a uio large enough to hold the number of iovecs passed */
2420 auio = uio_create(user_msg.msg_iovlen, 0,
2421 (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
2422 UIO_READ);
2423 if (auio == NULL) {
2424 error = ENOMEM;
2425 goto done;
2426 }
2427
2428 /*
2429 * get location of iovecs within the uio. then copyin the iovecs from
2430 * user space.
2431 */
2432 iovp = uio_iovsaddr_user(auio);
2433 if (iovp == NULL) {
2434 error = ENOMEM;
2435 goto done;
2436 }
2437 uiov = user_msg.msg_iov;
2438 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2439 error = copyin_user_iovec_array(uiov,
2440 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2441 user_msg.msg_iovlen, iovp);
2442 if (error) {
2443 goto done;
2444 }
2445
2446 /* finish setup of uio_t */
2447 error = uio_calculateresid_user(auio);
2448 if (error) {
2449 goto done;
2450 }
2451
2452 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2453 if (!error) {
2454 user_msg.msg_iov = uiov;
2455 if (is_p_64bit_process) {
2456 msg64.msg_flags = user_msg.msg_flags;
2457 msg64.msg_controllen = user_msg.msg_controllen;
2458 msg64.msg_control = user_msg.msg_control;
2459 msg64.msg_iovlen = user_msg.msg_iovlen;
2460 msg64.msg_iov = user_msg.msg_iov;
2461 msg64.msg_namelen = user_msg.msg_namelen;
2462 msg64.msg_name = user_msg.msg_name;
2463 } else {
2464 msg32.msg_flags = user_msg.msg_flags;
2465 msg32.msg_controllen = user_msg.msg_controllen;
2466 msg32.msg_control = (user32_addr_t)user_msg.msg_control;
2467 msg32.msg_iovlen = user_msg.msg_iovlen;
2468 msg32.msg_iov = (user32_addr_t)user_msg.msg_iov;
2469 msg32.msg_namelen = user_msg.msg_namelen;
2470 msg32.msg_name = (user32_addr_t)user_msg.msg_name;
2471 }
2472 error = copyout(msghdrp, uap->msg, size_of_msghdr);
2473 }
2474 done:
2475 if (auio != NULL) {
2476 uio_free(auio);
2477 }
2478 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2479 return error;
2480 }
2481
2482 __attribute__((noinline))
2483 static int
recvmsg_x_array(proc_ref_t p,socket_ref_t so,struct recvmsg_x_args * uap,user_ssize_t * retval)2484 recvmsg_x_array(proc_ref_t p, socket_ref_t so, struct recvmsg_x_args *uap, user_ssize_t *retval)
2485 {
2486 int error = EOPNOTSUPP;
2487 user_msghdr_x_ptr_t user_msg_x = NULL;
2488 recv_msg_elem_ptr_t recv_msg_array = NULL;
2489 user_ssize_t len_before = 0, len_after;
2490 size_t size_of_msghdr;
2491 void_ptr_t umsgp = NULL;
2492 u_int i;
2493 u_int uiocnt;
2494 int flags = uap->flags;
2495
2496 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2497
2498 size_of_msghdr = is_p_64bit_process ?
2499 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2500
2501 /*
2502 * Support only a subset of message flags
2503 */
2504 if (uap->flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA | MSG_NBIO)) {
2505 return EOPNOTSUPP;
2506 }
2507 /*
2508 * Input parameter range check
2509 */
2510 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2511 error = EINVAL;
2512 goto out;
2513 }
2514 if (uap->cnt > somaxrecvmsgx) {
2515 uap->cnt = somaxrecvmsgx > 0 ? somaxrecvmsgx : 1;
2516 }
2517
2518 user_msg_x = kalloc_type(struct user_msghdr_x, uap->cnt,
2519 Z_WAITOK | Z_ZERO);
2520 if (user_msg_x == NULL) {
2521 DBG_PRINTF("%s user_msg_x alloc failed", __func__);
2522 error = ENOMEM;
2523 goto out;
2524 }
2525 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2526 if (recv_msg_array == NULL) {
2527 DBG_PRINTF("%s alloc_recv_msg_array() failed", __func__);
2528 error = ENOMEM;
2529 goto out;
2530 }
2531
2532 umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
2533 if (umsgp == NULL) {
2534 DBG_PRINTF("%s umsgp alloc failed", __func__);
2535 error = ENOMEM;
2536 goto out;
2537 }
2538 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2539 if (error) {
2540 DBG_PRINTF("%s copyin() failed", __func__);
2541 goto out;
2542 }
2543 error = internalize_recv_msghdr_array(umsgp,
2544 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2545 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2546 if (error) {
2547 DBG_PRINTF("%s copyin_user_msghdr_array() failed", __func__);
2548 goto out;
2549 }
2550 /*
2551 * Make sure the size of each message iovec and
2552 * the aggregate size of all the iovec is valid
2553 */
2554 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2555 error = EINVAL;
2556 goto out;
2557 }
2558 /*
2559 * Sanity check on passed arguments
2560 */
2561 for (i = 0; i < uap->cnt; i++) {
2562 struct user_msghdr_x *mp = user_msg_x + i;
2563
2564 if (mp->msg_flags != 0) {
2565 error = EINVAL;
2566 goto out;
2567 }
2568 }
2569 #if CONFIG_MACF_SOCKET_SUBSET
2570 /*
2571 * We check the state without holding the socket lock;
2572 * if a race condition occurs, it would simply result
2573 * in an extra call to the MAC check function.
2574 */
2575 if (!(so->so_state & SS_DEFUNCT) &&
2576 !(so->so_state & SS_ISCONNECTED) &&
2577 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2578 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2579 goto out;
2580 }
2581 #endif /* MAC_SOCKET_SUBSET */
2582
2583 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2584
2585 for (i = 0; i < uap->cnt; i++) {
2586 struct recv_msg_elem *recv_msg_elem;
2587 uio_t auio;
2588 sockaddr_ref_ref_t psa;
2589 struct mbuf **controlp;
2590
2591 recv_msg_elem = recv_msg_array + i;
2592 auio = recv_msg_elem->uio;
2593
2594 /*
2595 * Do not block if we got at least one packet
2596 */
2597 if (i > 0) {
2598 flags |= MSG_DONTWAIT;
2599 }
2600
2601 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2602 &recv_msg_elem->psa : NULL;
2603 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2604 &recv_msg_elem->controlp : NULL;
2605
2606 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2607 auio, NULL, controlp, &flags);
2608 if (error) {
2609 break;
2610 }
2611 /*
2612 * We have some data
2613 */
2614 recv_msg_elem->which |= SOCK_MSG_DATA;
2615 /*
2616 * Set the messages flags for this packet
2617 */
2618 flags &= ~MSG_DONTWAIT;
2619 recv_msg_elem->flags = flags;
2620 /*
2621 * Stop on partial copy
2622 */
2623 if (recv_msg_elem->flags & (MSG_RCVMORE | MSG_TRUNC)) {
2624 break;
2625 }
2626 }
2627
2628 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2629
2630 if (error) {
2631 if (len_after != len_before && (error == ERESTART ||
2632 error == EINTR || error == EWOULDBLOCK)) {
2633 error = 0;
2634 } else {
2635 goto out;
2636 }
2637 }
2638
2639 uiocnt = externalize_recv_msghdr_array(p, so, umsgp,
2640 uap->cnt, user_msg_x, recv_msg_array, &error);
2641 if (error != 0) {
2642 goto out;
2643 }
2644
2645 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2646 if (error) {
2647 DBG_PRINTF("%s copyout() failed", __func__);
2648 goto out;
2649 }
2650 *retval = (int)(uiocnt);
2651
2652 out:
2653 kfree_data(umsgp, uap->cnt * size_of_msghdr);
2654 free_recv_msg_array(recv_msg_array, uap->cnt);
2655 kfree_type(struct user_msghdr_x, uap->cnt, user_msg_x);
2656
2657 return error;
2658 }
2659
2660 int
recvmsg_x(struct proc * p,struct recvmsg_x_args * uap,user_ssize_t * retval)2661 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2662 {
2663 int error = EOPNOTSUPP;
2664 socket_ref_t so;
2665 size_t size_of_msghdrx;
2666 caddr_t msghdrxp;
2667 struct user32_msghdr_x msghdrx32 = {};
2668 struct user64_msghdr_x msghdrx64 = {};
2669 int spacetype;
2670 u_int i;
2671 uio_t auio = NULL;
2672 caddr_t src;
2673 int flags;
2674 mbuf_ref_t pkt_list = NULL, m;
2675 mbuf_ref_t addr_list = NULL, m_addr;
2676 mbuf_ref_t ctl_list = NULL, control;
2677 u_int pktcnt;
2678
2679 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2680
2681 error = file_socket(uap->s, &so);
2682 if (error) {
2683 goto done_no_filedrop;
2684 }
2685 if (so == NULL) {
2686 error = EBADF;
2687 goto done;
2688 }
2689
2690 #if CONFIG_MACF_SOCKET_SUBSET
2691 /*
2692 * We check the state without holding the socket lock;
2693 * if a race condition occurs, it would simply result
2694 * in an extra call to the MAC check function.
2695 */
2696 if (!(so->so_state & SS_DEFUNCT) &&
2697 !(so->so_state & SS_ISCONNECTED) &&
2698 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2699 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2700 goto done;
2701 }
2702 #endif /* MAC_SOCKET_SUBSET */
2703
2704 /*
2705 * With soreceive_m_list, all packets must be uniform, with address and
2706 * control as they are returned in parallel lists and it's only guaranteed
2707 * when pru_send_list is supported
2708 */
2709 if (do_recvmsg_x_donttrunc != 0 || (so->so_options & SO_DONTTRUNC)) {
2710 error = recvmsg_x_array(p, so, uap, retval);
2711 goto done;
2712 }
2713
2714 /*
2715 * Input parameter range check
2716 */
2717 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2718 error = EINVAL;
2719 goto done;
2720 }
2721 if (uap->cnt > somaxrecvmsgx) {
2722 uap->cnt = somaxrecvmsgx > 0 ? somaxrecvmsgx : 1;
2723 }
2724
2725 if (IS_64BIT_PROCESS(p)) {
2726 msghdrxp = (caddr_t)&msghdrx64;
2727 size_of_msghdrx = sizeof(struct user64_msghdr_x);
2728 spacetype = UIO_USERSPACE64;
2729 } else {
2730 msghdrxp = (caddr_t)&msghdrx32;
2731 size_of_msghdrx = sizeof(struct user32_msghdr_x);
2732 spacetype = UIO_USERSPACE32;
2733 }
2734 src = __unsafe_forge_bidi_indexable(caddr_t, uap->msgp, uap->cnt);
2735
2736 flags = uap->flags;
2737
2738 /*
2739 * Only allow MSG_DONTWAIT
2740 */
2741 if ((flags & ~(MSG_DONTWAIT | MSG_NBIO)) != 0) {
2742 error = EINVAL;
2743 goto done;
2744 }
2745
2746 /*
2747 * Receive list of packet in a single call
2748 */
2749 pktcnt = uap->cnt;
2750 error = soreceive_m_list(so, &pktcnt, &addr_list, &pkt_list, &ctl_list,
2751 &flags);
2752 if (error != 0) {
2753 if (pktcnt != 0 && (error == ERESTART ||
2754 error == EINTR || error == EWOULDBLOCK)) {
2755 error = 0;
2756 } else {
2757 goto done;
2758 }
2759 }
2760
2761 m_addr = addr_list;
2762 m = pkt_list;
2763 control = ctl_list;
2764
2765 for (i = 0; i < pktcnt; i++) {
2766 struct user_msghdr user_msg;
2767 ssize_t len;
2768 struct user_iovec *iovp;
2769 struct mbuf *n;
2770
2771 if (!m_has_mtype(m, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
2772 panic("%s: m %p m_type %d != MT_DATA", __func__, m, m->m_type);
2773 }
2774
2775 error = copyin((user_addr_t)(src + i * size_of_msghdrx),
2776 msghdrxp, size_of_msghdrx);
2777 if (error) {
2778 DBG_PRINTF("%s copyin() msghdrx failed %d\n",
2779 __func__, error);
2780 goto done;
2781 }
2782 if (spacetype == UIO_USERSPACE64) {
2783 user_msg.msg_name = msghdrx64.msg_name;
2784 user_msg.msg_namelen = msghdrx64.msg_namelen;
2785 user_msg.msg_iov = msghdrx64.msg_iov;
2786 user_msg.msg_iovlen = msghdrx64.msg_iovlen;
2787 user_msg.msg_control = msghdrx64.msg_control;
2788 user_msg.msg_controllen = msghdrx64.msg_controllen;
2789 } else {
2790 user_msg.msg_name = msghdrx32.msg_name;
2791 user_msg.msg_namelen = msghdrx32.msg_namelen;
2792 user_msg.msg_iov = msghdrx32.msg_iov;
2793 user_msg.msg_iovlen = msghdrx32.msg_iovlen;
2794 user_msg.msg_control = msghdrx32.msg_control;
2795 user_msg.msg_controllen = msghdrx32.msg_controllen;
2796 }
2797 user_msg.msg_flags = 0;
2798 if (user_msg.msg_iovlen <= 0 ||
2799 user_msg.msg_iovlen > UIO_MAXIOV) {
2800 error = EMSGSIZE;
2801 DBG_PRINTF("%s bad msg_iovlen, error %d\n",
2802 __func__, error);
2803 goto done;
2804 }
2805 /*
2806 * Attempt to reuse the uio if large enough, otherwise we need
2807 * a new one
2808 */
2809 if (auio != NULL) {
2810 if (auio->uio_max_iovs <= user_msg.msg_iovlen) {
2811 uio_reset_fast(auio, 0, spacetype, UIO_READ);
2812 } else {
2813 uio_free(auio);
2814 auio = NULL;
2815 }
2816 }
2817 if (auio == NULL) {
2818 auio = uio_create(user_msg.msg_iovlen, 0, spacetype,
2819 UIO_READ);
2820 if (auio == NULL) {
2821 error = ENOBUFS;
2822 DBG_PRINTF("%s uio_create() failed %d\n",
2823 __func__, error);
2824 goto done;
2825 }
2826 }
2827 /*
2828 * get location of iovecs within the uio then copy the iovecs
2829 * from user space.
2830 */
2831 iovp = uio_iovsaddr_user(auio);
2832 if (iovp == NULL) {
2833 error = ENOMEM;
2834 DBG_PRINTF("%s uio_iovsaddr() failed %d\n",
2835 __func__, error);
2836 goto done;
2837 }
2838 error = copyin_user_iovec_array(user_msg.msg_iov,
2839 spacetype, user_msg.msg_iovlen, iovp);
2840 if (error != 0) {
2841 DBG_PRINTF("%s copyin_user_iovec_array() failed %d\n",
2842 __func__, error);
2843 goto done;
2844 }
2845 error = uio_calculateresid_user(auio);
2846 if (error != 0) {
2847 DBG_PRINTF("%s uio_calculateresid() failed %d\n",
2848 __func__, error);
2849 goto done;
2850 }
2851 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2852
2853 len = uio_resid(auio);
2854 for (n = m; n != NULL; n = n->m_next) {
2855 user_ssize_t resid = uio_resid(auio);
2856 if (resid < n->m_len) {
2857 error = uio_copyout_user(mtod(n, caddr_t), (int)n->m_len, auio);
2858 if (error != 0) {
2859 DBG_PRINTF("%s uiomove() failed\n",
2860 __func__);
2861 goto done;
2862 }
2863 flags |= MSG_TRUNC;
2864 break;
2865 }
2866
2867 error = uio_copyout_user(mtod(n, caddr_t), (int)n->m_len, auio);
2868 if (error != 0) {
2869 DBG_PRINTF("%s uiomove() failed\n",
2870 __func__);
2871 goto done;
2872 }
2873 }
2874 len -= uio_resid(auio);
2875
2876 if (user_msg.msg_name != 0 && user_msg.msg_namelen != 0) {
2877 error = copyout_maddr(m_addr, user_msg.msg_name,
2878 &user_msg.msg_namelen);
2879 if (error) {
2880 DBG_PRINTF("%s copyout_maddr() failed\n",
2881 __func__);
2882 goto done;
2883 }
2884 }
2885 if (user_msg.msg_control != 0 && user_msg.msg_controllen != 0) {
2886 error = copyout_control(p, control,
2887 user_msg.msg_control, &user_msg.msg_controllen,
2888 &user_msg.msg_flags, so);
2889 if (error) {
2890 DBG_PRINTF("%s copyout_control() failed\n",
2891 __func__);
2892 goto done;
2893 }
2894 }
2895 /*
2896 * Note: the original msg_iovlen and msg_iov do not change
2897 */
2898 if (spacetype == UIO_USERSPACE64) {
2899 msghdrx64.msg_flags = user_msg.msg_flags;
2900 msghdrx64.msg_controllen = user_msg.msg_controllen;
2901 msghdrx64.msg_control = user_msg.msg_control;
2902 msghdrx64.msg_namelen = user_msg.msg_namelen;
2903 msghdrx64.msg_name = user_msg.msg_name;
2904 msghdrx64.msg_datalen = len;
2905 } else {
2906 msghdrx32.msg_flags = user_msg.msg_flags;
2907 msghdrx32.msg_controllen = user_msg.msg_controllen;
2908 msghdrx32.msg_control = (user32_addr_t) user_msg.msg_control;
2909 msghdrx32.msg_name = user_msg.msg_namelen;
2910 msghdrx32.msg_name = (user32_addr_t) user_msg.msg_name;
2911 msghdrx32.msg_datalen = (user32_size_t) len;
2912 }
2913 error = copyout(msghdrxp,
2914 (user_addr_t)(src + i * size_of_msghdrx),
2915 size_of_msghdrx);
2916 if (error) {
2917 DBG_PRINTF("%s copyout() msghdrx failed\n", __func__);
2918 goto done;
2919 }
2920
2921 m = m->m_nextpkt;
2922 if (control != NULL) {
2923 control = control->m_nextpkt;
2924 }
2925 if (m_addr != NULL) {
2926 m_addr = m_addr->m_nextpkt;
2927 }
2928 }
2929
2930 uap->flags = flags;
2931
2932 *retval = (int)i;
2933 done:
2934 file_drop(uap->s);
2935
2936 done_no_filedrop:
2937 if (pkt_list != NULL) {
2938 m_freem_list(pkt_list);
2939 }
2940 if (addr_list != NULL) {
2941 m_freem_list(addr_list);
2942 }
2943 if (ctl_list != NULL) {
2944 m_freem_list(ctl_list);
2945 }
2946 if (auio != NULL) {
2947 uio_free(auio);
2948 }
2949
2950 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2951
2952 return error;
2953 }
2954
2955 /*
2956 * Returns: 0 Success
2957 * EBADF
2958 * file_socket:ENOTSOCK
2959 * file_socket:EBADF
2960 * soshutdown:EINVAL
2961 * soshutdown:ENOTCONN
2962 * soshutdown:EADDRNOTAVAIL[TCP]
2963 * soshutdown:ENOBUFS[TCP]
2964 * soshutdown:EMSGSIZE[TCP]
2965 * soshutdown:EHOSTUNREACH[TCP]
2966 * soshutdown:ENETUNREACH[TCP]
2967 * soshutdown:ENETDOWN[TCP]
2968 * soshutdown:ENOMEM[TCP]
2969 * soshutdown:EACCES[TCP]
2970 * soshutdown:EMSGSIZE[TCP]
2971 * soshutdown:ENOBUFS[TCP]
2972 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2973 * soshutdown:??? [other protocol families]
2974 */
2975 /* ARGSUSED */
2976 int
shutdown(__unused proc_ref_t p,struct shutdown_args * uap,__unused int32_ref_t retval)2977 shutdown(__unused proc_ref_t p, struct shutdown_args *uap,
2978 __unused int32_ref_t retval)
2979 {
2980 socket_ref_t so;
2981 int error;
2982
2983 AUDIT_ARG(fd, uap->s);
2984 error = file_socket(uap->s, &so);
2985 if (error) {
2986 return error;
2987 }
2988 if (so == NULL) {
2989 error = EBADF;
2990 goto out;
2991 }
2992 error = soshutdown((struct socket *)so, uap->how);
2993 out:
2994 file_drop(uap->s);
2995 return error;
2996 }
2997
2998 /*
2999 * Returns: 0 Success
3000 * EFAULT
3001 * EINVAL
3002 * EACCES Mandatory Access Control failure
3003 * file_socket:ENOTSOCK
3004 * file_socket:EBADF
3005 * sosetopt:EINVAL
3006 * sosetopt:ENOPROTOOPT
3007 * sosetopt:ENOBUFS
3008 * sosetopt:EDOM
3009 * sosetopt:EFAULT
3010 * sosetopt:EOPNOTSUPP[AF_UNIX]
3011 * sosetopt:???
3012 */
3013 /* ARGSUSED */
3014 int
setsockopt(proc_ref_t p,setsockopt_args_ref_t uap,__unused int32_ref_t retval)3015 setsockopt(proc_ref_t p, setsockopt_args_ref_t uap,
3016 __unused int32_ref_t retval)
3017 {
3018 socket_ref_t so;
3019 struct sockopt sopt;
3020 int error;
3021
3022 AUDIT_ARG(fd, uap->s);
3023 if (uap->val == 0 && uap->valsize != 0) {
3024 return EFAULT;
3025 }
3026 /* No bounds checking on size (it's unsigned) */
3027
3028 error = file_socket(uap->s, &so);
3029 if (error) {
3030 return error;
3031 }
3032
3033 sopt.sopt_dir = SOPT_SET;
3034 sopt.sopt_level = uap->level;
3035 sopt.sopt_name = uap->name;
3036 sopt.sopt_val = uap->val;
3037 sopt.sopt_valsize = uap->valsize;
3038 sopt.sopt_p = p;
3039
3040 if (so == NULL) {
3041 error = EINVAL;
3042 goto out;
3043 }
3044 #if CONFIG_MACF_SOCKET_SUBSET
3045 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
3046 &sopt)) != 0) {
3047 goto out;
3048 }
3049 #endif /* MAC_SOCKET_SUBSET */
3050 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
3051 out:
3052 file_drop(uap->s);
3053 return error;
3054 }
3055
3056 /*
3057 * Returns: 0 Success
3058 * EINVAL
3059 * EBADF
3060 * EACCES Mandatory Access Control failure
3061 * copyin:EFAULT
3062 * copyout:EFAULT
3063 * file_socket:ENOTSOCK
3064 * file_socket:EBADF
3065 * sogetopt:???
3066 */
3067 int
getsockopt(proc_ref_t p,struct getsockopt_args * uap,__unused int32_ref_t retval)3068 getsockopt(proc_ref_t p, struct getsockopt_args *uap,
3069 __unused int32_ref_t retval)
3070 {
3071 int error;
3072 socklen_t valsize;
3073 struct sockopt sopt;
3074 socket_ref_t so;
3075
3076 error = file_socket(uap->s, &so);
3077 if (error) {
3078 return error;
3079 }
3080 if (uap->val) {
3081 error = copyin(uap->avalsize, (caddr_t)&valsize,
3082 sizeof(valsize));
3083 if (error) {
3084 goto out;
3085 }
3086 /* No bounds checking on size (it's unsigned) */
3087 } else {
3088 valsize = 0;
3089 }
3090 sopt.sopt_dir = SOPT_GET;
3091 sopt.sopt_level = uap->level;
3092 sopt.sopt_name = uap->name;
3093 sopt.sopt_val = uap->val;
3094 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
3095 sopt.sopt_p = p;
3096
3097 if (so == NULL) {
3098 error = EBADF;
3099 goto out;
3100 }
3101 #if CONFIG_MACF_SOCKET_SUBSET
3102 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
3103 &sopt)) != 0) {
3104 goto out;
3105 }
3106 #endif /* MAC_SOCKET_SUBSET */
3107 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
3108 if (error == 0) {
3109 valsize = (socklen_t)sopt.sopt_valsize;
3110 error = copyout((caddr_t)&valsize, uap->avalsize,
3111 sizeof(valsize));
3112 }
3113 out:
3114 file_drop(uap->s);
3115 return error;
3116 }
3117
3118
3119 /*
3120 * Get socket name.
3121 *
3122 * Returns: 0 Success
3123 * EBADF
3124 * file_socket:ENOTSOCK
3125 * file_socket:EBADF
3126 * copyin:EFAULT
3127 * copyout:EFAULT
3128 * <pru_sockaddr>:ENOBUFS[TCP]
3129 * <pru_sockaddr>:ECONNRESET[TCP]
3130 * <pru_sockaddr>:EINVAL[AF_UNIX]
3131 * <sf_getsockname>:???
3132 */
3133 /* ARGSUSED */
3134 int
getsockname(__unused proc_ref_t p,struct getsockname_args * uap,__unused int32_ref_t retval)3135 getsockname(__unused proc_ref_t p, struct getsockname_args *uap,
3136 __unused int32_ref_t retval)
3137 {
3138 socket_ref_t so;
3139 sockaddr_ref_t sa;
3140 socklen_t len;
3141 socklen_t sa_len;
3142 int error;
3143
3144 error = file_socket(uap->fdes, &so);
3145 if (error) {
3146 return error;
3147 }
3148 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
3149 if (error) {
3150 goto out;
3151 }
3152 if (so == NULL) {
3153 error = EBADF;
3154 goto out;
3155 }
3156 sa = 0;
3157 socket_lock(so, 1);
3158 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
3159 if (error == 0) {
3160 error = sflt_getsockname(so, &sa);
3161 if (error == EJUSTRETURN) {
3162 error = 0;
3163 }
3164 }
3165 socket_unlock(so, 1);
3166 if (error) {
3167 goto bad;
3168 }
3169 if (sa == 0) {
3170 len = 0;
3171 goto gotnothing;
3172 }
3173
3174 sa_len = sa->sa_len;
3175 len = MIN(len, sa_len);
3176 error = copyout(__SA_UTILS_CONV_TO_BYTES(sa), uap->asa, len);
3177 if (error) {
3178 goto bad;
3179 }
3180 /* return the actual, untruncated address length */
3181 len = sa_len;
3182 gotnothing:
3183 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
3184 bad:
3185 free_sockaddr(sa);
3186 out:
3187 file_drop(uap->fdes);
3188 return error;
3189 }
3190
3191 /*
3192 * Get name of peer for connected socket.
3193 *
3194 * Returns: 0 Success
3195 * EBADF
3196 * EINVAL
3197 * ENOTCONN
3198 * file_socket:ENOTSOCK
3199 * file_socket:EBADF
3200 * copyin:EFAULT
3201 * copyout:EFAULT
3202 * <pru_peeraddr>:???
3203 * <sf_getpeername>:???
3204 */
3205 /* ARGSUSED */
3206 int
getpeername(__unused proc_ref_t p,struct getpeername_args * uap,__unused int32_ref_t retval)3207 getpeername(__unused proc_ref_t p, struct getpeername_args *uap,
3208 __unused int32_ref_t retval)
3209 {
3210 socket_ref_t so;
3211 sockaddr_ref_t sa;
3212 socklen_t len;
3213 socklen_t sa_len;
3214 int error;
3215
3216 error = file_socket(uap->fdes, &so);
3217 if (error) {
3218 return error;
3219 }
3220 if (so == NULL) {
3221 error = EBADF;
3222 goto out;
3223 }
3224
3225 socket_lock(so, 1);
3226
3227 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
3228 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
3229 /* the socket has been shutdown, no more getpeername's */
3230 socket_unlock(so, 1);
3231 error = EINVAL;
3232 goto out;
3233 }
3234
3235 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
3236 socket_unlock(so, 1);
3237 error = ENOTCONN;
3238 goto out;
3239 }
3240 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
3241 if (error) {
3242 socket_unlock(so, 1);
3243 goto out;
3244 }
3245 sa = 0;
3246 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
3247 if (error == 0) {
3248 error = sflt_getpeername(so, &sa);
3249 if (error == EJUSTRETURN) {
3250 error = 0;
3251 }
3252 }
3253 socket_unlock(so, 1);
3254 if (error) {
3255 goto bad;
3256 }
3257 if (sa == 0) {
3258 len = 0;
3259 goto gotnothing;
3260 }
3261 sa_len = sa->sa_len;
3262 len = MIN(len, sa_len);
3263 error = copyout(__SA_UTILS_CONV_TO_BYTES(sa), uap->asa, len);
3264 if (error) {
3265 goto bad;
3266 }
3267 /* return the actual, untruncated address length */
3268 len = sa_len;
3269 gotnothing:
3270 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
3271 bad:
3272 free_sockaddr(sa);
3273 out:
3274 file_drop(uap->fdes);
3275 return error;
3276 }
3277
3278 int
sockargs(struct mbuf ** mp,user_addr_t data,socklen_t buflen,int type)3279 sockargs(struct mbuf **mp, user_addr_t data, socklen_t buflen, int type)
3280 {
3281 sockaddr_ref_t sa;
3282 struct mbuf *m;
3283 int error;
3284 socklen_t alloc_buflen = buflen;
3285
3286 if (buflen > INT_MAX / 2) {
3287 return EINVAL;
3288 }
3289 if (type == MT_SONAME && (buflen > SOCK_MAXADDRLEN ||
3290 buflen < offsetof(struct sockaddr, sa_data[0]))) {
3291 return EINVAL;
3292 }
3293 if (type == MT_CONTROL && buflen < sizeof(struct cmsghdr)) {
3294 return EINVAL;
3295 }
3296
3297 #ifdef __LP64__
3298 /*
3299 * The fd's in the buffer must expand to be pointers, thus we need twice
3300 * as much space
3301 */
3302 if (type == MT_CONTROL) {
3303 alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
3304 sizeof(struct cmsghdr);
3305 }
3306 #endif
3307 if (alloc_buflen > MLEN) {
3308 if (type == MT_SONAME && alloc_buflen <= 112) {
3309 alloc_buflen = MLEN; /* unix domain compat. hack */
3310 } else if (alloc_buflen > MCLBYTES) {
3311 return EINVAL;
3312 }
3313 }
3314 m = m_get(M_WAIT, type);
3315 if (m == NULL) {
3316 return ENOBUFS;
3317 }
3318 if (alloc_buflen > MLEN) {
3319 MCLGET(m, M_WAIT);
3320 if ((m->m_flags & M_EXT) == 0) {
3321 m_free(m);
3322 return ENOBUFS;
3323 }
3324 }
3325 /*
3326 * K64: We still copyin the original buflen because it gets expanded
3327 * later and we lie about the size of the mbuf because it only affects
3328 * unp_* functions
3329 */
3330 m->m_len = buflen;
3331 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
3332 if (error) {
3333 (void) m_free(m);
3334 } else {
3335 *mp = m;
3336 if (type == MT_SONAME) {
3337 VERIFY(buflen <= SOCK_MAXADDRLEN);
3338 sa = mtod(m, sockaddr_ref_t);
3339 sa->sa_len = (__uint8_t)buflen;
3340 }
3341 }
3342 return error;
3343 }
3344
3345 /*
3346 * Given a user_addr_t of length len, allocate and fill out a *sa.
3347 *
3348 * Returns: 0 Success
3349 * ENAMETOOLONG Filename too long
3350 * EINVAL Invalid argument
3351 * ENOMEM Not enough space
3352 * copyin:EFAULT Bad address
3353 */
3354 static int
getsockaddr(struct socket * so,sockaddr_ref_ref_t namp,user_addr_t uaddr,size_t len,boolean_t translate_unspec)3355 getsockaddr(struct socket *so, sockaddr_ref_ref_t namp, user_addr_t uaddr,
3356 size_t len, boolean_t translate_unspec)
3357 {
3358 struct sockaddr *sa;
3359 int error;
3360
3361 if (len > SOCK_MAXADDRLEN) {
3362 return ENAMETOOLONG;
3363 }
3364
3365 if (len < offsetof(struct sockaddr, sa_data[0])) {
3366 return EINVAL;
3367 }
3368
3369 sa = alloc_sockaddr(len, Z_WAITOK | Z_NOFAIL);
3370
3371 error = copyin(uaddr, (caddr_t)sa, len);
3372 if (error) {
3373 free_sockaddr(sa);
3374 } else {
3375 /*
3376 * Force sa_family to AF_INET on AF_INET sockets to handle
3377 * legacy applications that use AF_UNSPEC (0). On all other
3378 * sockets we leave it unchanged and let the lower layer
3379 * handle it.
3380 */
3381 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
3382 SOCK_CHECK_DOM(so, PF_INET) &&
3383 len == sizeof(struct sockaddr_in)) {
3384 sa->sa_family = AF_INET;
3385 }
3386 VERIFY(len <= SOCK_MAXADDRLEN);
3387 sa = *&sa;
3388 sa->sa_len = (__uint8_t)len;
3389 *namp = sa;
3390 }
3391 return error;
3392 }
3393
3394 static int
getsockaddr_s(struct socket * so,sockaddr_storage_ref_t ss,user_addr_t uaddr,size_t len,boolean_t translate_unspec)3395 getsockaddr_s(struct socket *so, sockaddr_storage_ref_t ss,
3396 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
3397 {
3398 int error;
3399
3400 if (ss == NULL || uaddr == USER_ADDR_NULL ||
3401 len < offsetof(struct sockaddr, sa_data[0])) {
3402 return EINVAL;
3403 }
3404
3405 /*
3406 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
3407 * so the check here is inclusive.
3408 */
3409 if (len > sizeof(*ss)) {
3410 return ENAMETOOLONG;
3411 }
3412
3413 bzero(ss, sizeof(*ss));
3414 error = copyin(uaddr, __SA_UTILS_CONV_TO_BYTES(ss), len);
3415 if (error == 0) {
3416 /*
3417 * Force sa_family to AF_INET on AF_INET sockets to handle
3418 * legacy applications that use AF_UNSPEC (0). On all other
3419 * sockets we leave it unchanged and let the lower layer
3420 * handle it.
3421 */
3422 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
3423 SOCK_CHECK_DOM(so, PF_INET) &&
3424 len == sizeof(struct sockaddr_in)) {
3425 ss->ss_family = AF_INET;
3426 }
3427
3428 ss->ss_len = (__uint8_t)len;
3429 }
3430 return error;
3431 }
3432
3433 int
internalize_recv_msghdr_array(const void_ptr_t src,int spacetype,int direction,u_int count,user_msghdr_x_ptr_t dst,recv_msg_elem_ptr_t recv_msg_array)3434 internalize_recv_msghdr_array(const void_ptr_t src, int spacetype, int direction,
3435 u_int count, user_msghdr_x_ptr_t dst,
3436 recv_msg_elem_ptr_t recv_msg_array)
3437 {
3438 int error = 0;
3439 u_int i;
3440
3441 for (i = 0; i < count; i++) {
3442 struct user_iovec *iovp;
3443 struct user_msghdr_x *user_msg = dst + i;
3444 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3445
3446 if (spacetype == UIO_USERSPACE64) {
3447 const struct user64_msghdr_x *msghdr64;
3448
3449 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3450
3451 user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
3452 user_msg->msg_namelen = msghdr64->msg_namelen;
3453 user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
3454 user_msg->msg_iovlen = msghdr64->msg_iovlen;
3455 user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
3456 user_msg->msg_controllen = msghdr64->msg_controllen;
3457 user_msg->msg_flags = msghdr64->msg_flags;
3458 user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
3459 } else {
3460 const struct user32_msghdr_x *msghdr32;
3461
3462 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3463
3464 user_msg->msg_name = msghdr32->msg_name;
3465 user_msg->msg_namelen = msghdr32->msg_namelen;
3466 user_msg->msg_iov = msghdr32->msg_iov;
3467 user_msg->msg_iovlen = msghdr32->msg_iovlen;
3468 user_msg->msg_control = msghdr32->msg_control;
3469 user_msg->msg_controllen = msghdr32->msg_controllen;
3470 user_msg->msg_flags = msghdr32->msg_flags;
3471 user_msg->msg_datalen = msghdr32->msg_datalen;
3472 }
3473
3474 if (user_msg->msg_iovlen <= 0 ||
3475 user_msg->msg_iovlen > UIO_MAXIOV) {
3476 error = EMSGSIZE;
3477 goto done;
3478 }
3479 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3480 spacetype, direction);
3481 if (recv_msg_elem->uio == NULL) {
3482 error = ENOMEM;
3483 goto done;
3484 }
3485
3486 iovp = uio_iovsaddr_user(recv_msg_elem->uio);
3487 if (iovp == NULL) {
3488 error = ENOMEM;
3489 goto done;
3490 }
3491 error = copyin_user_iovec_array(user_msg->msg_iov,
3492 spacetype, user_msg->msg_iovlen, iovp);
3493 if (error) {
3494 goto done;
3495 }
3496 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3497
3498 error = uio_calculateresid_user(recv_msg_elem->uio);
3499 if (error) {
3500 goto done;
3501 }
3502 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3503
3504 if (user_msg->msg_name && user_msg->msg_namelen) {
3505 recv_msg_elem->which |= SOCK_MSG_SA;
3506 }
3507 if (user_msg->msg_control && user_msg->msg_controllen) {
3508 recv_msg_elem->which |= SOCK_MSG_CONTROL;
3509 }
3510 }
3511 done:
3512
3513 return error;
3514 }
3515
3516 u_int
externalize_recv_msghdr_array(proc_ref_t p,socket_ref_t so,void_ptr_t dst,u_int count,user_msghdr_x_ptr_t src,recv_msg_elem_ptr_t recv_msg_array,int_ref_t ret_error)3517 externalize_recv_msghdr_array(proc_ref_t p, socket_ref_t so, void_ptr_t dst,
3518 u_int count, user_msghdr_x_ptr_t src,
3519 recv_msg_elem_ptr_t recv_msg_array, int_ref_t ret_error)
3520 {
3521 u_int i;
3522 u_int retcnt = 0;
3523 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3524
3525 *ret_error = 0;
3526
3527 for (i = 0; i < count; i++) {
3528 struct user_msghdr_x *user_msg = src + i;
3529 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3530 user_ssize_t len = 0;
3531 int error;
3532
3533 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3534
3535 if ((recv_msg_elem->which & SOCK_MSG_DATA)) {
3536 retcnt++;
3537
3538 if (recv_msg_elem->which & SOCK_MSG_SA) {
3539 error = copyout_sa(recv_msg_elem->psa, user_msg->msg_name,
3540 &user_msg->msg_namelen);
3541 if (error != 0) {
3542 *ret_error = error;
3543 return 0;
3544 }
3545 }
3546 if (recv_msg_elem->which & SOCK_MSG_CONTROL) {
3547 error = copyout_control(p, recv_msg_elem->controlp,
3548 user_msg->msg_control, &user_msg->msg_controllen,
3549 &recv_msg_elem->flags, so);
3550 if (error != 0) {
3551 *ret_error = error;
3552 return 0;
3553 }
3554 }
3555 }
3556
3557 if (spacetype == UIO_USERSPACE64) {
3558 struct user64_msghdr_x *msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3559
3560 msghdr64->msg_namelen = user_msg->msg_namelen;
3561 msghdr64->msg_controllen = user_msg->msg_controllen;
3562 msghdr64->msg_flags = recv_msg_elem->flags;
3563 msghdr64->msg_datalen = len;
3564 } else {
3565 struct user32_msghdr_x *msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3566
3567 msghdr32->msg_namelen = user_msg->msg_namelen;
3568 msghdr32->msg_controllen = user_msg->msg_controllen;
3569 msghdr32->msg_flags = recv_msg_elem->flags;
3570 msghdr32->msg_datalen = (user32_size_t)len;
3571 }
3572 }
3573 return retcnt;
3574 }
3575
3576 recv_msg_elem_ptr_t
alloc_recv_msg_array(u_int count)3577 alloc_recv_msg_array(u_int count)
3578 {
3579 return kalloc_type(struct recv_msg_elem, count, Z_WAITOK | Z_ZERO);
3580 }
3581
3582 void
free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array,u_int count)3583 free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3584 {
3585 if (recv_msg_array == NULL) {
3586 return;
3587 }
3588 for (uint32_t i = 0; i < count; i++) {
3589 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3590
3591 if (recv_msg_elem->uio != NULL) {
3592 uio_free(recv_msg_elem->uio);
3593 }
3594 free_sockaddr(recv_msg_elem->psa);
3595 if (recv_msg_elem->controlp != NULL) {
3596 m_freem(recv_msg_elem->controlp);
3597 }
3598 }
3599 kfree_type(struct recv_msg_elem, count, recv_msg_array);
3600 }
3601
3602
3603 /* Extern linkage requires using __counted_by instead of bptr */
3604 __private_extern__ user_ssize_t
recv_msg_array_resid(struct recv_msg_elem * __counted_by (count)recv_msg_array,u_int count)3605 recv_msg_array_resid(struct recv_msg_elem * __counted_by(count)recv_msg_array, u_int count)
3606 {
3607 user_ssize_t len = 0;
3608 u_int i;
3609
3610 for (i = 0; i < count; i++) {
3611 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3612
3613 if (recv_msg_elem->uio != NULL) {
3614 len += uio_resid(recv_msg_elem->uio);
3615 }
3616 }
3617 return len;
3618 }
3619
3620 int
recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array,u_int count)3621 recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3622 {
3623 user_ssize_t len = 0;
3624 u_int i;
3625
3626 for (i = 0; i < count; i++) {
3627 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3628
3629 if (recv_msg_elem->uio != NULL) {
3630 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3631
3632 /*
3633 * Sanity check on the validity of the iovec:
3634 * no point of going over sb_max
3635 */
3636 if (resid < 0 || (u_int32_t)resid > sb_max) {
3637 return 0;
3638 }
3639
3640 len += resid;
3641 if (len < 0 || (u_int32_t)len > sb_max) {
3642 return 0;
3643 }
3644 }
3645 }
3646 return 1;
3647 }
3648
3649 #if SENDFILE
3650
3651 #define SFUIOBUFS 64
3652
3653 /* Macros to compute the number of mbufs needed depending on cluster size */
3654 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3655 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3656
3657 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3658 #define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3659
3660 /* Upper send limit in the number of mbuf clusters */
3661 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3662 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3663
3664 static void
alloc_sendpkt(int how,size_t pktlen,unsigned int * maxchunks,mbuf_ref_ref_t m,boolean_t jumbocl)3665 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3666 mbuf_ref_ref_t m, boolean_t jumbocl)
3667 {
3668 unsigned int needed;
3669
3670 if (pktlen == 0) {
3671 panic("%s: pktlen (%ld) must be non-zero", __func__, pktlen);
3672 }
3673
3674 /*
3675 * Try to allocate for the whole thing. Since we want full control
3676 * over the buffer size and be able to accept partial result, we can't
3677 * use mbuf_allocpacket(). The logic below is similar to sosend().
3678 */
3679 *m = NULL;
3680 if (pktlen > MBIGCLBYTES && jumbocl) {
3681 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3682 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3683 }
3684 if (*m == NULL) {
3685 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3686 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3687 }
3688
3689 /*
3690 * Our previous attempt(s) at allocation had failed; the system
3691 * may be short on mbufs, and we want to block until they are
3692 * available. This time, ask just for 1 mbuf and don't return
3693 * until we get it.
3694 */
3695 if (*m == NULL) {
3696 needed = 1;
3697 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3698 }
3699 if (*m == NULL) {
3700 panic("%s: blocking allocation returned NULL", __func__);
3701 }
3702
3703 *maxchunks = needed;
3704 }
3705
3706 /*
3707 * sendfile(2).
3708 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3709 * struct sf_hdtr *hdtr, int flags)
3710 *
3711 * Send a file specified by 'fd' and starting at 'offset' to a socket
3712 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3713 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3714 * output. If specified, write the total number of bytes sent into *nbytes.
3715 */
3716 int
sendfile(proc_ref_t p,struct sendfile_args * uap,__unused int * retval)3717 sendfile(proc_ref_t p, struct sendfile_args *uap, __unused int *retval)
3718 {
3719 fileproc_ref_t fp;
3720 vnode_ref_t vp;
3721 socket_ref_t so;
3722 struct writev_nocancel_args nuap;
3723 user_ssize_t writev_retval;
3724 struct user_sf_hdtr user_hdtr;
3725 struct user32_sf_hdtr user32_hdtr;
3726 struct user64_sf_hdtr user64_hdtr;
3727 off_t off, xfsize;
3728 off_t nbytes = 0, sbytes = 0;
3729 int error = 0;
3730 size_t sizeof_hdtr;
3731 off_t file_size;
3732 struct vfs_context context = *vfs_context_current();
3733 bool got_vnode_ref = false;
3734
3735 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
3736
3737 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3738 0, 0, 0, 0);
3739
3740 AUDIT_ARG(fd, uap->fd);
3741 AUDIT_ARG(value32, uap->s);
3742
3743 /*
3744 * Do argument checking. Must be a regular file in, stream
3745 * type and connected socket out, positive offset.
3746 */
3747 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3748 goto done;
3749 }
3750 if ((error = vnode_getwithref(vp))) {
3751 goto done;
3752 }
3753 got_vnode_ref = true;
3754
3755 if ((fp->f_flag & FREAD) == 0) {
3756 error = EBADF;
3757 goto done1;
3758 }
3759 if (vnode_isreg(vp) == 0) {
3760 error = ENOTSUP;
3761 goto done1;
3762 }
3763 error = file_socket(uap->s, &so);
3764 if (error) {
3765 goto done1;
3766 }
3767 if (so == NULL) {
3768 error = EBADF;
3769 goto done2;
3770 }
3771 if (so->so_type != SOCK_STREAM) {
3772 error = EINVAL;
3773 goto done2;
3774 }
3775 if ((so->so_state & SS_ISCONNECTED) == 0) {
3776 error = ENOTCONN;
3777 goto done2;
3778 }
3779 if (uap->offset < 0) {
3780 error = EINVAL;
3781 goto done2;
3782 }
3783 if (uap->nbytes == USER_ADDR_NULL) {
3784 error = EINVAL;
3785 goto done2;
3786 }
3787 if (uap->flags != 0) {
3788 error = EINVAL;
3789 goto done2;
3790 }
3791
3792 context.vc_ucred = fp->fp_glob->fg_cred;
3793
3794 #if CONFIG_MACF_SOCKET_SUBSET
3795 /* JMM - fetch connected sockaddr? */
3796 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3797 if (error) {
3798 goto done2;
3799 }
3800 #endif
3801
3802 /*
3803 * Get number of bytes to send
3804 * Should it applies to size of header and trailer?
3805 */
3806 error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3807 if (error) {
3808 goto done2;
3809 }
3810
3811 /*
3812 * If specified, get the pointer to the sf_hdtr struct for
3813 * any headers/trailers.
3814 */
3815 if (uap->hdtr != USER_ADDR_NULL) {
3816 caddr_t hdtrp;
3817
3818 bzero(&user_hdtr, sizeof(user_hdtr));
3819 if (is_p_64bit_process) {
3820 hdtrp = (caddr_t)&user64_hdtr;
3821 sizeof_hdtr = sizeof(user64_hdtr);
3822 } else {
3823 hdtrp = (caddr_t)&user32_hdtr;
3824 sizeof_hdtr = sizeof(user32_hdtr);
3825 }
3826 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3827 if (error) {
3828 goto done2;
3829 }
3830 if (is_p_64bit_process) {
3831 user_hdtr.headers = user64_hdtr.headers;
3832 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3833 user_hdtr.trailers = user64_hdtr.trailers;
3834 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3835 } else {
3836 user_hdtr.headers = user32_hdtr.headers;
3837 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3838 user_hdtr.trailers = user32_hdtr.trailers;
3839 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3840 }
3841
3842 /*
3843 * Send any headers. Wimp out and use writev(2).
3844 */
3845 if (user_hdtr.headers != USER_ADDR_NULL) {
3846 bzero(&nuap, sizeof(struct writev_args));
3847 nuap.fd = uap->s;
3848 nuap.iovp = user_hdtr.headers;
3849 nuap.iovcnt = user_hdtr.hdr_cnt;
3850 error = writev_nocancel(p, &nuap, &writev_retval);
3851 if (error) {
3852 goto done2;
3853 }
3854 sbytes += writev_retval;
3855 }
3856 }
3857
3858 /*
3859 * Get the file size for 2 reasons:
3860 * 1. We don't want to allocate more mbufs than necessary
3861 * 2. We don't want to read past the end of file
3862 */
3863 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3864 goto done2;
3865 }
3866
3867 /*
3868 * Simply read file data into a chain of mbufs that used with scatter
3869 * gather reads. We're not (yet?) setup to use zero copy external
3870 * mbufs that point to the file pages.
3871 */
3872 socket_lock(so, 1);
3873 error = sblock(&so->so_snd, SBL_WAIT);
3874 if (error) {
3875 socket_unlock(so, 1);
3876 goto done2;
3877 }
3878 for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3879 mbuf_ref_t m0 = NULL;
3880 mbuf_t m;
3881 unsigned int nbufs = SFUIOBUFS, i;
3882 uio_t auio;
3883 UIO_STACKBUF(uio_buf, SFUIOBUFS); /* 1KB !!! */
3884 size_t uiolen;
3885 user_ssize_t rlen;
3886 off_t pgoff;
3887 size_t pktlen;
3888 boolean_t jumbocl;
3889
3890 /*
3891 * Calculate the amount to transfer.
3892 * Align to round number of pages.
3893 * Not to exceed send socket buffer,
3894 * the EOF, or the passed in nbytes.
3895 */
3896 xfsize = sbspace(&so->so_snd);
3897
3898 if (xfsize <= 0) {
3899 if (so->so_state & SS_CANTSENDMORE) {
3900 error = EPIPE;
3901 goto done3;
3902 } else if ((so->so_state & SS_NBIO)) {
3903 error = EAGAIN;
3904 goto done3;
3905 } else {
3906 xfsize = PAGE_SIZE;
3907 }
3908 }
3909
3910 if (xfsize > SENDFILE_MAX_BYTES) {
3911 xfsize = SENDFILE_MAX_BYTES;
3912 } else if (xfsize > PAGE_SIZE) {
3913 xfsize = trunc_page(xfsize);
3914 }
3915 pgoff = off & PAGE_MASK_64;
3916 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3917 xfsize = PAGE_SIZE_64 - pgoff;
3918 }
3919 if (nbytes && xfsize > (nbytes - sbytes)) {
3920 xfsize = nbytes - sbytes;
3921 }
3922 if (xfsize <= 0) {
3923 break;
3924 }
3925 if (off + xfsize > file_size) {
3926 xfsize = file_size - off;
3927 }
3928 if (xfsize <= 0) {
3929 break;
3930 }
3931
3932 /*
3933 * Attempt to use larger than system page-size clusters for
3934 * large writes only if there is a jumbo cluster pool and
3935 * if the socket is marked accordingly.
3936 */
3937 jumbocl = sosendjcl && njcl > 0 &&
3938 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3939
3940 socket_unlock(so, 0);
3941 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3942 pktlen = mbuf_pkthdr_maxlen(m0);
3943 if (pktlen < (size_t)xfsize) {
3944 xfsize = pktlen;
3945 }
3946
3947 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3948 UIO_READ, &uio_buf[0], sizeof(uio_buf));
3949 if (auio == NULL) {
3950 DBG_PRINTF("sendfile failed. nbufs = %d. %s", nbufs,
3951 "File a radar related to rdar://10146739.\n");
3952 mbuf_freem(m0);
3953 error = ENXIO;
3954 socket_lock(so, 0);
3955 goto done3;
3956 }
3957
3958 for (i = 0, m = m0, uiolen = 0;
3959 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3960 i++, m = mbuf_next(m)) {
3961 size_t mlen = mbuf_maxlen(m);
3962
3963 if (mlen + uiolen > (size_t)xfsize) {
3964 mlen = xfsize - uiolen;
3965 }
3966 mbuf_setlen(m, mlen);
3967 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3968 mlen);
3969 uiolen += mlen;
3970 }
3971
3972 if (xfsize != uio_resid(auio)) {
3973 DBG_PRINTF("sendfile: xfsize: %lld != uio_resid(auio): "
3974 "%lld\n", xfsize, (long long)uio_resid(auio));
3975 }
3976
3977 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3978 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3979 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3980 error = fo_read(fp, auio, FOF_OFFSET, &context);
3981 socket_lock(so, 0);
3982 if (error != 0) {
3983 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3984 error == EINTR || error == EWOULDBLOCK)) {
3985 error = 0;
3986 } else {
3987 mbuf_freem(m0);
3988 goto done3;
3989 }
3990 }
3991 xfsize -= uio_resid(auio);
3992 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3993 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3994 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3995
3996 if (xfsize == 0) {
3997 break;
3998 }
3999 if (xfsize + off > file_size) {
4000 DBG_PRINTF("sendfile: xfsize: %lld + off: %lld > file_size:"
4001 "%lld\n", xfsize, off, file_size);
4002 }
4003 for (i = 0, m = m0, rlen = 0;
4004 i < nbufs && m != NULL && rlen < xfsize;
4005 i++, m = mbuf_next(m)) {
4006 size_t mlen = mbuf_maxlen(m);
4007
4008 if (rlen + mlen > (size_t)xfsize) {
4009 mlen = xfsize - rlen;
4010 }
4011 mbuf_setlen(m, mlen);
4012
4013 rlen += mlen;
4014 }
4015 mbuf_pkthdr_setlen(m0, xfsize);
4016
4017 retry_space:
4018 /*
4019 * Make sure that the socket is still able to take more data.
4020 * CANTSENDMORE being true usually means that the connection
4021 * was closed. so_error is true when an error was sensed after
4022 * a previous send.
4023 * The state is checked after the page mapping and buffer
4024 * allocation above since those operations may block and make
4025 * any socket checks stale. From this point forward, nothing
4026 * blocks before the pru_send (or more accurately, any blocking
4027 * results in a loop back to here to re-check).
4028 */
4029 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
4030 if (so->so_state & SS_CANTSENDMORE) {
4031 error = EPIPE;
4032 } else {
4033 error = so->so_error;
4034 so->so_error = 0;
4035 }
4036 m_freem(m0);
4037 goto done3;
4038 }
4039 /*
4040 * Wait for socket space to become available. We do this just
4041 * after checking the connection state above in order to avoid
4042 * a race condition with sbwait().
4043 */
4044 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
4045 if (so->so_state & SS_NBIO) {
4046 m_freem(m0);
4047 error = EAGAIN;
4048 goto done3;
4049 }
4050 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
4051 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
4052 error = sbwait(&so->so_snd);
4053 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
4054 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
4055 /*
4056 * An error from sbwait usually indicates that we've
4057 * been interrupted by a signal. If we've sent anything
4058 * then return bytes sent, otherwise return the error.
4059 */
4060 if (error) {
4061 m_freem(m0);
4062 goto done3;
4063 }
4064 goto retry_space;
4065 }
4066
4067 mbuf_ref_t control = NULL;
4068 {
4069 /*
4070 * Socket filter processing
4071 */
4072
4073 error = sflt_data_out(so, NULL, &m0, &control, 0);
4074 if (error) {
4075 if (error == EJUSTRETURN) {
4076 error = 0;
4077 continue;
4078 }
4079 goto done3;
4080 }
4081 /*
4082 * End Socket filter processing
4083 */
4084 }
4085 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
4086 uap->s, 0, 0, 0, 0);
4087 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
4088 NULL, control, p);
4089 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
4090 uap->s, 0, 0, 0, 0);
4091 if (error) {
4092 goto done3;
4093 }
4094 }
4095 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
4096 /*
4097 * Send trailers. Wimp out and use writev(2).
4098 */
4099 if (uap->hdtr != USER_ADDR_NULL &&
4100 user_hdtr.trailers != USER_ADDR_NULL) {
4101 bzero(&nuap, sizeof(struct writev_args));
4102 nuap.fd = uap->s;
4103 nuap.iovp = user_hdtr.trailers;
4104 nuap.iovcnt = user_hdtr.trl_cnt;
4105 error = writev_nocancel(p, &nuap, &writev_retval);
4106 if (error) {
4107 goto done2;
4108 }
4109 sbytes += writev_retval;
4110 }
4111 done2:
4112 file_drop(uap->s);
4113 done1:
4114 file_drop(uap->fd);
4115 done:
4116 if (got_vnode_ref) {
4117 vnode_put(vp);
4118 }
4119 if (uap->nbytes != USER_ADDR_NULL) {
4120 /* XXX this appears bogus for some early failure conditions */
4121 copyout(&sbytes, uap->nbytes, sizeof(off_t));
4122 }
4123 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
4124 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
4125 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
4126 return error;
4127 done3:
4128 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
4129 goto done2;
4130 }
4131
4132
4133 #endif /* SENDFILE */
4134