1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/kauth.h>
82 #include <sys/file_internal.h>
83 #include <sys/guarded.h>
84 #include <sys/priv.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/stat.h>
88 #include <sys/ioctl.h>
89 #include <sys/fcntl.h>
90 #include <sys/fsctl.h>
91 #include <sys/malloc.h>
92 #include <sys/mman.h>
93 #include <sys/mount.h>
94 #include <sys/syslog.h>
95 #include <sys/unistd.h>
96 #include <sys/resourcevar.h>
97 #include <sys/aio_kern.h>
98 #include <sys/ev.h>
99 #include <kern/locks.h>
100 #include <sys/uio_internal.h>
101 #include <sys/codesign.h>
102 #include <sys/codedir_internal.h>
103 #include <sys/mount_internal.h>
104 #include <sys/kdebug.h>
105 #include <sys/sysproto.h>
106 #include <sys/pipe.h>
107 #include <sys/spawn.h>
108 #include <sys/cprotect.h>
109 #include <sys/ubc_internal.h>
110
111 #include <kern/kern_types.h>
112 #include <kern/kalloc.h>
113 #include <kern/waitq.h>
114 #include <kern/ipc_misc.h>
115 #include <kern/ast.h>
116
117 #include <vm/vm_protos.h>
118 #include <mach/mach_port.h>
119
120 #include <security/audit/audit.h>
121 #if CONFIG_MACF
122 #include <security/mac_framework.h>
123 #endif
124
125 #include <stdbool.h>
126 #include <os/atomic_private.h>
127 #include <os/overflow.h>
128 #include <IOKit/IOBSD.h>
129
130 #define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
131 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
132 mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
133 void ipc_port_release_send(ipc_port_t);
134
135 void fileport_releasefg(struct fileglob *fg);
136
137 /* flags for fp_close_and_unlock */
138 #define FD_DUP2RESV 1
139
140 /* We don't want these exported */
141
142 __private_extern__
143 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
144
145 /* Conflict wait queue for when selects collide (opaque type) */
146 extern struct waitq select_conflict_queue;
147
148 #define f_flag fp_glob->fg_flag
149 #define f_type fp_glob->fg_ops->fo_type
150 #define f_cred fp_glob->fg_cred
151 #define f_ops fp_glob->fg_ops
152 #define f_offset fp_glob->fg_offset
153
154 static SECURITY_READ_ONLY_LATE(zone_t) fp_zone;
155 ZONE_INIT(&fp_zone, "fileproc", sizeof(struct fileproc),
156 ZC_ZFREE_CLEARMEM, ZONE_ID_FILEPROC, NULL);
157
158 ZONE_DECLARE(fg_zone, "fileglob", sizeof(struct fileglob), ZC_ZFREE_CLEARMEM);
159 /*
160 * If you need accounting for KM_OFILETABL consider using
161 * KALLOC_HEAP_DEFINE to define a view.
162 */
163 #define KM_OFILETABL KHEAP_DEFAULT
164
165 /*
166 * Descriptor management.
167 */
168 int nfiles; /* actual number of open files */
169 /*
170 * "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
171 */
172 static const struct fileops uninitops;
173
174 os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
175 static LCK_GRP_DECLARE(file_lck_grp, "file");
176
177
178 #pragma mark fileglobs
179
180 /*!
181 * @function fg_free
182 *
183 * @brief
184 * Free a file structure.
185 */
186 static void
fg_free(struct fileglob * fg)187 fg_free(struct fileglob *fg)
188 {
189 os_atomic_dec(&nfiles, relaxed);
190
191 if (fg->fg_vn_data) {
192 fg_vn_data_free(fg->fg_vn_data);
193 fg->fg_vn_data = NULL;
194 }
195
196 kauth_cred_t cred = fg->fg_cred;
197 if (IS_VALID_CRED(cred)) {
198 kauth_cred_unref(&cred);
199 fg->fg_cred = NOCRED;
200 }
201 lck_mtx_destroy(&fg->fg_lock, &file_lck_grp);
202
203 #if CONFIG_MACF && CONFIG_VNGUARD
204 vng_file_label_destroy(fg);
205 #endif
206 zfree(fg_zone, fg);
207 }
208
209 OS_ALWAYS_INLINE
210 void
fg_ref(proc_t p,struct fileglob * fg)211 fg_ref(proc_t p, struct fileglob *fg)
212 {
213 #if DEBUG || DEVELOPMENT
214 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
215 #else
216 (void)p;
217 #endif
218 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
219 }
220
221 void
fg_drop_live(struct fileglob * fg)222 fg_drop_live(struct fileglob *fg)
223 {
224 os_ref_release_live_raw(&fg->fg_count, &f_refgrp);
225 }
226
227 int
fg_drop(proc_t p,struct fileglob * fg)228 fg_drop(proc_t p, struct fileglob *fg)
229 {
230 struct vnode *vp;
231 struct vfs_context context;
232 int error = 0;
233
234 if (fg == NULL) {
235 return 0;
236 }
237
238 /* Set up context with cred stashed in fg */
239 if (p == current_proc()) {
240 context.vc_thread = current_thread();
241 } else {
242 context.vc_thread = NULL;
243 }
244 context.vc_ucred = fg->fg_cred;
245
246 /*
247 * POSIX record locking dictates that any close releases ALL
248 * locks owned by this process. This is handled by setting
249 * a flag in the unlock to free ONLY locks obeying POSIX
250 * semantics, and not to free BSD-style file locks.
251 * If the descriptor was in a message, POSIX-style locks
252 * aren't passed with the descriptor.
253 */
254 if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
255 (p->p_ladvflag & P_LADVLOCK)) {
256 struct flock lf = {
257 .l_whence = SEEK_SET,
258 .l_type = F_UNLCK,
259 };
260
261 vp = (struct vnode *)fg_get_data(fg);
262 if ((error = vnode_getwithref(vp)) == 0) {
263 (void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
264 (void)vnode_put(vp);
265 }
266 }
267
268 if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == 0) {
269 /*
270 * Since we ensure that fg->fg_ops is always initialized,
271 * it is safe to invoke fo_close on the fg
272 */
273 error = fo_close(fg, &context);
274
275 fg_free(fg);
276 }
277
278 return error;
279 }
280
281 inline
282 void
fg_set_data(struct fileglob * fg,void * fg_data)283 fg_set_data(
284 struct fileglob *fg,
285 void *fg_data)
286 {
287 uintptr_t *store = &fg->fg_data;
288
289 #if __has_feature(ptrauth_calls)
290 int type = FILEGLOB_DTYPE(fg);
291
292 if (fg_data) {
293 type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
294 fg_data = ptrauth_sign_unauthenticated(fg_data,
295 ptrauth_key_process_independent_data,
296 ptrauth_blend_discriminator(store, type));
297 }
298 #endif // __has_feature(ptrauth_calls)
299
300 *store = (uintptr_t)fg_data;
301 }
302
303 inline
304 void *
fg_get_data_volatile(struct fileglob * fg)305 fg_get_data_volatile(struct fileglob *fg)
306 {
307 uintptr_t *store = &fg->fg_data;
308 void *fg_data = (void *)*store;
309
310 #if __has_feature(ptrauth_calls)
311 int type = FILEGLOB_DTYPE(fg);
312
313 if (fg_data) {
314 type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
315 fg_data = ptrauth_auth_data(fg_data,
316 ptrauth_key_process_independent_data,
317 ptrauth_blend_discriminator(store, type));
318 }
319 #endif // __has_feature(ptrauth_calls)
320
321 return fg_data;
322 }
323
324
325 bool
fg_sendable(struct fileglob * fg)326 fg_sendable(struct fileglob *fg)
327 {
328 switch (FILEGLOB_DTYPE(fg)) {
329 case DTYPE_VNODE:
330 case DTYPE_SOCKET:
331 case DTYPE_PIPE:
332 case DTYPE_PSXSHM:
333 case DTYPE_NETPOLICY:
334 return (fg->fg_lflags & FG_CONFINED) == 0;
335
336 default:
337 return false;
338 }
339 }
340
341 #pragma mark file descriptor table (static helpers)
342
343 static void
procfdtbl_reservefd(struct proc * p,int fd)344 procfdtbl_reservefd(struct proc * p, int fd)
345 {
346 p->p_fd.fd_ofiles[fd] = NULL;
347 p->p_fd.fd_ofileflags[fd] |= UF_RESERVED;
348 }
349
350 void
procfdtbl_releasefd(struct proc * p,int fd,struct fileproc * fp)351 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
352 {
353 if (fp != NULL) {
354 p->p_fd.fd_ofiles[fd] = fp;
355 }
356 p->p_fd.fd_ofileflags[fd] &= ~UF_RESERVED;
357 if ((p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
358 p->p_fd.fd_ofileflags[fd] &= ~UF_RESVWAIT;
359 wakeup(&p->p_fd);
360 }
361 }
362
363 static void
procfdtbl_waitfd(struct proc * p,int fd)364 procfdtbl_waitfd(struct proc * p, int fd)
365 {
366 p->p_fd.fd_ofileflags[fd] |= UF_RESVWAIT;
367 msleep(&p->p_fd, &p->p_fd.fd_lock, PRIBIO, "ftbl_waitfd", NULL);
368 }
369
370 static void
procfdtbl_clearfd(struct proc * p,int fd)371 procfdtbl_clearfd(struct proc * p, int fd)
372 {
373 int waiting;
374
375 waiting = (p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT);
376 p->p_fd.fd_ofiles[fd] = NULL;
377 p->p_fd.fd_ofileflags[fd] = 0;
378 if (waiting == UF_RESVWAIT) {
379 wakeup(&p->p_fd);
380 }
381 }
382
383 /*
384 * fdrelse
385 *
386 * Description: Inline utility function to free an fd in a filedesc
387 *
388 * Parameters: fdp Pointer to filedesc fd lies in
389 * fd fd to free
390 * reserv fd should be reserved
391 *
392 * Returns: void
393 *
394 * Locks: Assumes proc_fdlock for process pointing to fdp is held by
395 * the caller
396 */
397 void
fdrelse(struct proc * p,int fd)398 fdrelse(struct proc * p, int fd)
399 {
400 struct filedesc *fdp = &p->p_fd;
401 int nfd = 0;
402
403 if (fd < fdp->fd_freefile) {
404 fdp->fd_freefile = fd;
405 }
406 #if DIAGNOSTIC
407 if (fd >= fdp->fd_afterlast) {
408 panic("fdrelse: fd_afterlast inconsistent");
409 }
410 #endif
411 procfdtbl_clearfd(p, fd);
412
413 nfd = fdp->fd_afterlast;
414 while (nfd > 0 && fdp->fd_ofiles[nfd - 1] == NULL &&
415 !(fdp->fd_ofileflags[nfd - 1] & UF_RESERVED)) {
416 nfd--;
417 }
418 fdp->fd_afterlast = nfd;
419
420 #if CONFIG_PROC_RESOURCE_LIMITS
421 fdp->fd_nfiles_open--;
422 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
423 }
424
425
426 /*
427 * finishdup
428 *
429 * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
430 *
431 * Parameters: p Process performing the dup
432 * old The fd to dup
433 * new The fd to dup it to
434 * fp_flags Flags to augment the new fp
435 * retval Pointer to the call return area
436 *
437 * Returns: 0 Success
438 * EBADF
439 * ENOMEM
440 *
441 * Implicit returns:
442 * *retval (modified) The new descriptor
443 *
444 * Locks: Assumes proc_fdlock for process pointing to fdp is held by
445 * the caller
446 *
447 * Notes: This function may drop and reacquire this lock; it is unsafe
448 * for a caller to assume that other state protected by the lock
449 * has not been subsequently changed out from under it.
450 */
451 static int
finishdup(proc_t p,struct filedesc * fdp,int old,int new,fileproc_flags_t fp_flags,int32_t * retval)452 finishdup(proc_t p, struct filedesc *fdp, int old, int new,
453 fileproc_flags_t fp_flags, int32_t *retval)
454 {
455 struct fileproc *nfp;
456 struct fileproc *ofp;
457 #if CONFIG_MACF
458 int error;
459 kauth_cred_t cred;
460 #endif
461
462 #if DIAGNOSTIC
463 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
464 #endif
465 if ((ofp = fdp->fd_ofiles[old]) == NULL ||
466 (fdp->fd_ofileflags[old] & UF_RESERVED)) {
467 fdrelse(p, new);
468 return EBADF;
469 }
470
471 #if CONFIG_MACF
472 cred = kauth_cred_proc_ref(p);
473 error = mac_file_check_dup(cred, ofp->fp_glob, new);
474 kauth_cred_unref(&cred);
475
476 if (error) {
477 fdrelse(p, new);
478 return error;
479 }
480 #endif
481
482 fg_ref(p, ofp->fp_glob);
483
484 proc_fdunlock(p);
485
486 nfp = fileproc_alloc_init();
487
488 if (fp_flags) {
489 nfp->fp_flags |= fp_flags;
490 }
491 nfp->fp_glob = ofp->fp_glob;
492
493 proc_fdlock(p);
494
495 #if DIAGNOSTIC
496 if (fdp->fd_ofiles[new] != 0) {
497 panic("finishdup: overwriting fd_ofiles with new %d", new);
498 }
499 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
500 panic("finishdup: unreserved fileflags with new %d", new);
501 }
502 #endif
503
504 if (new >= fdp->fd_afterlast) {
505 fdp->fd_afterlast = new + 1;
506 }
507 procfdtbl_releasefd(p, new, nfp);
508 *retval = new;
509 return 0;
510 }
511
512
513 #pragma mark file descriptor table (exported functions)
514
515 void
proc_dirs_lock_shared(proc_t p)516 proc_dirs_lock_shared(proc_t p)
517 {
518 lck_rw_lock_shared(&p->p_fd.fd_dirs_lock);
519 }
520
521 void
proc_dirs_unlock_shared(proc_t p)522 proc_dirs_unlock_shared(proc_t p)
523 {
524 lck_rw_unlock_shared(&p->p_fd.fd_dirs_lock);
525 }
526
527 void
proc_dirs_lock_exclusive(proc_t p)528 proc_dirs_lock_exclusive(proc_t p)
529 {
530 lck_rw_lock_exclusive(&p->p_fd.fd_dirs_lock);
531 }
532
533 void
proc_dirs_unlock_exclusive(proc_t p)534 proc_dirs_unlock_exclusive(proc_t p)
535 {
536 lck_rw_unlock_exclusive(&p->p_fd.fd_dirs_lock);
537 }
538
539 /*
540 * proc_fdlock, proc_fdlock_spin
541 *
542 * Description: Lock to control access to the per process struct fileproc
543 * and struct filedesc
544 *
545 * Parameters: p Process to take the lock on
546 *
547 * Returns: void
548 *
549 * Notes: The lock is initialized in forkproc() and destroyed in
550 * reap_child_process().
551 */
552 void
proc_fdlock(proc_t p)553 proc_fdlock(proc_t p)
554 {
555 lck_mtx_lock(&p->p_fd.fd_lock);
556 }
557
558 void
proc_fdlock_spin(proc_t p)559 proc_fdlock_spin(proc_t p)
560 {
561 lck_mtx_lock_spin(&p->p_fd.fd_lock);
562 }
563
564 void
proc_fdlock_assert(proc_t p,int assertflags)565 proc_fdlock_assert(proc_t p, int assertflags)
566 {
567 lck_mtx_assert(&p->p_fd.fd_lock, assertflags);
568 }
569
570
571 /*
572 * proc_fdunlock
573 *
574 * Description: Unlock the lock previously locked by a call to proc_fdlock()
575 *
576 * Parameters: p Process to drop the lock on
577 *
578 * Returns: void
579 */
580 void
proc_fdunlock(proc_t p)581 proc_fdunlock(proc_t p)
582 {
583 lck_mtx_unlock(&p->p_fd.fd_lock);
584 }
585
586 bool
fdt_available_locked(proc_t p,int n)587 fdt_available_locked(proc_t p, int n)
588 {
589 struct filedesc *fdp = &p->p_fd;
590 struct fileproc **fpp;
591 char *flags;
592 int i;
593 int lim = proc_limitgetcur_nofile(p);
594
595 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
596 return true;
597 }
598 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
599 flags = &fdp->fd_ofileflags[fdp->fd_freefile];
600 for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
601 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
602 return true;
603 }
604 }
605 return false;
606 }
607
608
609 struct fdt_iterator
fdt_next(proc_t p,int fd,bool only_settled)610 fdt_next(proc_t p, int fd, bool only_settled)
611 {
612 struct fdt_iterator it;
613 struct filedesc *fdp = &p->p_fd;
614 struct fileproc *fp;
615 int nfds = fdp->fd_afterlast;
616
617 while (++fd < nfds) {
618 fp = fdp->fd_ofiles[fd];
619 if (fp == NULL || fp->fp_glob == NULL) {
620 continue;
621 }
622 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
623 continue;
624 }
625 it.fdti_fd = fd;
626 it.fdti_fp = fp;
627 return it;
628 }
629
630 it.fdti_fd = nfds;
631 it.fdti_fp = NULL;
632 return it;
633 }
634
635 struct fdt_iterator
fdt_prev(proc_t p,int fd,bool only_settled)636 fdt_prev(proc_t p, int fd, bool only_settled)
637 {
638 struct fdt_iterator it;
639 struct filedesc *fdp = &p->p_fd;
640 struct fileproc *fp;
641
642 while (--fd >= 0) {
643 fp = fdp->fd_ofiles[fd];
644 if (fp == NULL || fp->fp_glob == NULL) {
645 continue;
646 }
647 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
648 continue;
649 }
650 it.fdti_fd = fd;
651 it.fdti_fp = fp;
652 return it;
653 }
654
655 it.fdti_fd = -1;
656 it.fdti_fp = NULL;
657 return it;
658 }
659
660 void
fdt_init(proc_t p)661 fdt_init(proc_t p)
662 {
663 struct filedesc *fdp = &p->p_fd;
664
665 lck_mtx_init(&fdp->fd_kqhashlock, &proc_kqhashlock_grp, &proc_lck_attr);
666 lck_mtx_init(&fdp->fd_knhashlock, &proc_knhashlock_grp, &proc_lck_attr);
667 lck_mtx_init(&fdp->fd_lock, &proc_fdmlock_grp, &proc_lck_attr);
668 lck_rw_init(&fdp->fd_dirs_lock, &proc_dirslock_grp, &proc_lck_attr);
669 }
670
671 void
fdt_destroy(proc_t p)672 fdt_destroy(proc_t p)
673 {
674 struct filedesc *fdp = &p->p_fd;
675
676 lck_mtx_destroy(&fdp->fd_kqhashlock, &proc_kqhashlock_grp);
677 lck_mtx_destroy(&fdp->fd_knhashlock, &proc_knhashlock_grp);
678 lck_mtx_destroy(&fdp->fd_lock, &proc_fdmlock_grp);
679 lck_rw_destroy(&fdp->fd_dirs_lock, &proc_dirslock_grp);
680 }
681
682 void
fdt_exec(proc_t p,short posix_spawn_flags)683 fdt_exec(proc_t p, short posix_spawn_flags)
684 {
685 struct filedesc *fdp = &p->p_fd;
686 thread_t self = current_thread();
687 struct uthread *ut = get_bsdthread_info(self);
688 struct kqworkq *dealloc_kqwq = NULL;
689
690 /*
691 * If the current thread is bound as a workq/workloop
692 * servicing thread, we need to unbind it first.
693 */
694 if (ut->uu_kqr_bound && get_bsdthreadtask_info(self) == p) {
695 kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
696 }
697
698 /*
699 * Deallocate the knotes for this process
700 * and mark the tables non-existent so
701 * subsequent kqueue closes go faster.
702 */
703 knotes_dealloc(p);
704 assert(fdp->fd_knlistsize == 0);
705 assert(fdp->fd_knhashmask == 0);
706
707 proc_fdlock(p);
708
709 for (int i = fdp->fd_afterlast; i-- > 0;) {
710 struct fileproc *fp = fdp->fd_ofiles[i];
711 char *flagp = &fdp->fd_ofileflags[i];
712 bool inherit_file = true;
713
714 if (fp == FILEPROC_NULL) {
715 continue;
716 }
717
718 /*
719 * no file descriptor should be in flux when in exec,
720 * because we stopped all other threads
721 */
722 if (*flagp & ~UF_INHERIT) {
723 panic("file %d/%p in flux during exec of %p", i, fp, p);
724 }
725
726 if (fp->fp_flags & FP_CLOEXEC) {
727 inherit_file = false;
728 } else if ((posix_spawn_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) &&
729 !(*flagp & UF_INHERIT)) {
730 /*
731 * Reverse the usual semantics of file descriptor
732 * inheritance - all of them should be closed
733 * except files marked explicitly as "inherit" and
734 * not marked close-on-exec.
735 */
736 inherit_file = false;
737 #if CONFIG_MACF
738 } else if (mac_file_check_inherit(proc_ucred(p), fp->fp_glob)) {
739 inherit_file = false;
740 #endif
741 }
742
743 *flagp = 0; /* clear UF_INHERIT */
744
745 if (!inherit_file) {
746 fp_close_and_unlock(p, i, fp, 0);
747 proc_fdlock(p);
748 }
749 }
750
751 /* release the per-process workq kq */
752 if (fdp->fd_wqkqueue) {
753 dealloc_kqwq = fdp->fd_wqkqueue;
754 fdp->fd_wqkqueue = NULL;
755 }
756
757 proc_fdunlock(p);
758
759 /* Anything to free? */
760 if (dealloc_kqwq) {
761 kqworkq_dealloc(dealloc_kqwq);
762 }
763 }
764
765
766 int
fdt_fork(struct filedesc * newfdp,proc_t p,vnode_t uth_cdir)767 fdt_fork(struct filedesc *newfdp, proc_t p, vnode_t uth_cdir)
768 {
769 struct filedesc *fdp = &p->p_fd;
770 struct fileproc **ofiles;
771 char *ofileflags;
772 int n_files, afterlast, freefile;
773 vnode_t v_dir;
774 #if CONFIG_PROC_RESOURCE_LIMITS
775 int fd_nfiles_open = 0;
776 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
777 proc_fdlock(p);
778
779 newfdp->fd_flags = (fdp->fd_flags & FILEDESC_FORK_INHERITED_MASK);
780 newfdp->fd_cmask = fdp->fd_cmask;
781 #if CONFIG_PROC_RESOURCE_LIMITS
782 newfdp->fd_nfiles_soft_limit = fdp->fd_nfiles_soft_limit;
783 newfdp->fd_nfiles_hard_limit = fdp->fd_nfiles_hard_limit;
784 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
785
786 /*
787 * For both fd_cdir and fd_rdir make sure we get
788 * a valid reference... if we can't, than set
789 * set the pointer(s) to NULL in the child... this
790 * will keep us from using a non-referenced vp
791 * and allows us to do the vnode_rele only on
792 * a properly referenced vp
793 */
794 if ((v_dir = fdp->fd_rdir)) {
795 if (vnode_getwithref(v_dir) == 0) {
796 if (vnode_ref(v_dir) == 0) {
797 newfdp->fd_rdir = v_dir;
798 }
799 vnode_put(v_dir);
800 }
801 if (newfdp->fd_rdir == NULL) {
802 /*
803 * We couldn't get a new reference on
804 * the chroot directory being
805 * inherited... this is fatal, since
806 * otherwise it would constitute an
807 * escape from a chroot environment by
808 * the new process.
809 */
810 proc_fdunlock(p);
811 return EPERM;
812 }
813 }
814
815 /*
816 * If we are running with per-thread current working directories,
817 * inherit the new current working directory from the current thread.
818 */
819 if ((v_dir = uth_cdir ? uth_cdir : fdp->fd_cdir)) {
820 if (vnode_getwithref(v_dir) == 0) {
821 if (vnode_ref(v_dir) == 0) {
822 newfdp->fd_cdir = v_dir;
823 }
824 vnode_put(v_dir);
825 }
826 if (newfdp->fd_cdir == NULL && v_dir == fdp->fd_cdir) {
827 /*
828 * we couldn't get a new reference on
829 * the current working directory being
830 * inherited... we might as well drop
831 * our reference from the parent also
832 * since the vnode has gone DEAD making
833 * it useless... by dropping it we'll
834 * be that much closer to recycling it
835 */
836 vnode_rele(fdp->fd_cdir);
837 fdp->fd_cdir = NULL;
838 }
839 }
840
841 /*
842 * If the number of open files fits in the internal arrays
843 * of the open file structure, use them, otherwise allocate
844 * additional memory for the number of descriptors currently
845 * in use.
846 */
847 afterlast = fdp->fd_afterlast;
848 freefile = fdp->fd_freefile;
849 if (afterlast <= NDFILE) {
850 n_files = NDFILE;
851 } else {
852 n_files = roundup(afterlast, NDEXTENT);
853 }
854
855 proc_fdunlock(p);
856
857 ofiles = kheap_alloc(KM_OFILETABL, n_files * OFILESIZE,
858 Z_WAITOK | Z_ZERO);
859 if (ofiles == NULL) {
860 if (newfdp->fd_cdir) {
861 vnode_rele(newfdp->fd_cdir);
862 newfdp->fd_cdir = NULL;
863 }
864 if (newfdp->fd_rdir) {
865 vnode_rele(newfdp->fd_rdir);
866 newfdp->fd_rdir = NULL;
867 }
868 return ENOMEM;
869 }
870 ofileflags = (char *)&ofiles[n_files];
871
872 proc_fdlock(p);
873
874 for (int i = afterlast; i-- > 0;) {
875 struct fileproc *ofp, *nfp;
876 char flags;
877
878 ofp = fdp->fd_ofiles[i];
879 flags = fdp->fd_ofileflags[i];
880
881 if (ofp == NULL ||
882 (ofp->fp_glob->fg_lflags & FG_CONFINED) ||
883 (ofp->fp_flags & FP_CLOFORK) ||
884 (flags & UF_RESERVED)) {
885 if (i + 1 == afterlast) {
886 afterlast = i;
887 }
888 if (i < freefile) {
889 freefile = i;
890 }
891
892 continue;
893 }
894
895 assert(ofp->fp_guard_attrs == 0);
896 nfp = fileproc_alloc_init();
897 nfp->fp_glob = ofp->fp_glob;
898 nfp->fp_flags = (ofp->fp_flags & FP_CLOEXEC);
899 fg_ref(p, nfp->fp_glob);
900
901 ofiles[i] = nfp;
902 #if CONFIG_PROC_RESOURCE_LIMITS
903 fd_nfiles_open++;
904 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
905 }
906
907 proc_fdunlock(p);
908
909 newfdp->fd_ofiles = ofiles;
910 newfdp->fd_ofileflags = ofileflags;
911 newfdp->fd_nfiles = n_files;
912 newfdp->fd_afterlast = afterlast;
913 newfdp->fd_freefile = freefile;
914
915 #if CONFIG_PROC_RESOURCE_LIMITS
916 newfdp->fd_nfiles_open = fd_nfiles_open;
917 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
918
919 return 0;
920 }
921
922 void
fdt_invalidate(proc_t p)923 fdt_invalidate(proc_t p)
924 {
925 struct filedesc *fdp = &p->p_fd;
926 struct fileproc *fp, **ofiles;
927 struct kqworkq *kqwq = NULL;
928 vnode_t vn1 = NULL, vn2 = NULL;
929 struct kqwllist *kqhash = NULL;
930 u_long kqhashmask = 0;
931 int n_files = 0;
932
933 /*
934 * deallocate all the knotes up front and claim empty
935 * tables to make any subsequent kqueue closes faster.
936 */
937 knotes_dealloc(p);
938 assert(fdp->fd_knlistsize == 0);
939 assert(fdp->fd_knhashmask == 0);
940
941 /*
942 * dealloc all workloops that have outstanding retains
943 * when created with scheduling parameters.
944 */
945 kqworkloops_dealloc(p);
946
947 proc_fdlock(p);
948
949 /* close file descriptors */
950 if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
951 for (int i = fdp->fd_afterlast; i-- > 0;) {
952 if ((fp = fdp->fd_ofiles[i]) != NULL) {
953 if (fdp->fd_ofileflags[i] & UF_RESERVED) {
954 panic("fdfree: found fp with UF_RESERVED");
955 }
956 fp_close_and_unlock(p, i, fp, 0);
957 proc_fdlock(p);
958 }
959 }
960 }
961
962 n_files = fdp->fd_nfiles;
963 ofiles = fdp->fd_ofiles;
964 kqwq = fdp->fd_wqkqueue;
965 vn1 = fdp->fd_cdir;
966 vn2 = fdp->fd_rdir;
967
968 fdp->fd_ofileflags = NULL;
969 fdp->fd_ofiles = NULL;
970 fdp->fd_nfiles = 0;
971 fdp->fd_wqkqueue = NULL;
972 fdp->fd_cdir = NULL;
973 fdp->fd_rdir = NULL;
974
975 proc_fdunlock(p);
976
977 lck_mtx_lock(&fdp->fd_knhashlock);
978
979 kqhash = fdp->fd_kqhash;
980 kqhashmask = fdp->fd_kqhashmask;
981
982 fdp->fd_kqhash = 0;
983 fdp->fd_kqhashmask = 0;
984
985 lck_mtx_unlock(&fdp->fd_knhashlock);
986
987 kheap_free(KM_OFILETABL, ofiles, n_files * OFILESIZE);
988
989 if (kqwq) {
990 kqworkq_dealloc(kqwq);
991 }
992 if (vn1) {
993 vnode_rele(vn1);
994 }
995 if (vn2) {
996 vnode_rele(vn2);
997 }
998 if (kqhash) {
999 for (uint32_t i = 0; i <= kqhashmask; i++) {
1000 assert(LIST_EMPTY(&kqhash[i]));
1001 }
1002 hashdestroy(kqhash, M_KQUEUE, kqhashmask);
1003 }
1004 }
1005
1006
1007 struct fileproc *
fileproc_alloc_init(void)1008 fileproc_alloc_init(void)
1009 {
1010 struct fileproc *fp;
1011
1012 fp = zalloc_flags(fp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1013 os_ref_init(&fp->fp_iocount, &f_refgrp);
1014 return fp;
1015 }
1016
1017
1018 void
fileproc_free(struct fileproc * fp)1019 fileproc_free(struct fileproc *fp)
1020 {
1021 os_ref_count_t __unused refc = os_ref_release(&fp->fp_iocount);
1022 #if DEVELOPMENT || DEBUG
1023 if (0 != refc) {
1024 panic("%s: pid %d refc: %u != 0",
1025 __func__, proc_pid(current_proc()), refc);
1026 }
1027 #endif
1028 if (fp->fp_guard_attrs) {
1029 guarded_fileproc_unguard(fp);
1030 }
1031 assert(fp->fp_wset == NULL);
1032 zfree(fp_zone, fp);
1033 }
1034
1035
1036 /*
1037 * Statistics counter for the number of times a process calling fdalloc()
1038 * has resulted in an expansion of the per process open file table.
1039 *
1040 * XXX This would likely be of more use if it were per process
1041 */
1042 int fdexpand;
1043
1044 #if CONFIG_PROC_RESOURCE_LIMITS
1045 /*
1046 * Should be called only with the proc_fdlock held.
1047 */
1048 void
fd_check_limit_exceeded(struct filedesc * fdp)1049 fd_check_limit_exceeded(struct filedesc *fdp)
1050 {
1051 #if DIAGNOSTIC
1052 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1053 #endif
1054 if (!fd_above_soft_limit_notify(fdp) && fdp->fd_nfiles_soft_limit &&
1055 (fdp->fd_nfiles_open > fdp->fd_nfiles_soft_limit)) {
1056 fd_above_soft_limit_send_notification(fdp);
1057 act_set_astproc_resource(current_thread());
1058 } else if (!fd_above_hard_limit_notify(fdp) && fdp->fd_nfiles_hard_limit &&
1059 (fdp->fd_nfiles_open > fdp->fd_nfiles_hard_limit)) {
1060 fd_above_hard_limit_send_notification(fdp);
1061 act_set_astproc_resource(current_thread());
1062 }
1063 }
1064 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1065
1066 /*
1067 * fdalloc
1068 *
1069 * Description: Allocate a file descriptor for the process.
1070 *
1071 * Parameters: p Process to allocate the fd in
1072 * want The fd we would prefer to get
1073 * result Pointer to fd we got
1074 *
1075 * Returns: 0 Success
1076 * EMFILE
1077 * ENOMEM
1078 *
1079 * Implicit returns:
1080 * *result (modified) The fd which was allocated
1081 */
1082 int
fdalloc(proc_t p,int want,int * result)1083 fdalloc(proc_t p, int want, int *result)
1084 {
1085 struct filedesc *fdp = &p->p_fd;
1086 int i;
1087 int last, numfiles, oldnfiles;
1088 struct fileproc **newofiles, **ofiles;
1089 char *newofileflags;
1090 int lim = proc_limitgetcur_nofile(p);
1091
1092 /*
1093 * Search for a free descriptor starting at the higher
1094 * of want or fd_freefile. If that fails, consider
1095 * expanding the ofile array.
1096 */
1097 #if DIAGNOSTIC
1098 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1099 #endif
1100
1101 for (;;) {
1102 last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
1103 if ((i = want) < fdp->fd_freefile) {
1104 i = fdp->fd_freefile;
1105 }
1106 for (; i < last; i++) {
1107 if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
1108 procfdtbl_reservefd(p, i);
1109 if (i >= fdp->fd_afterlast) {
1110 fdp->fd_afterlast = i + 1;
1111 }
1112 if (want <= fdp->fd_freefile) {
1113 fdp->fd_freefile = i;
1114 }
1115 *result = i;
1116 #if CONFIG_PROC_RESOURCE_LIMITS
1117 fdp->fd_nfiles_open++;
1118 fd_check_limit_exceeded(fdp);
1119 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1120 return 0;
1121 }
1122 }
1123
1124 /*
1125 * No space in current array. Expand?
1126 */
1127 if ((rlim_t)fdp->fd_nfiles >= lim) {
1128 return EMFILE;
1129 }
1130 if (fdp->fd_nfiles < NDEXTENT) {
1131 numfiles = NDEXTENT;
1132 } else {
1133 numfiles = 2 * fdp->fd_nfiles;
1134 }
1135 /* Enforce lim */
1136 if ((rlim_t)numfiles > lim) {
1137 numfiles = (int)lim;
1138 }
1139 proc_fdunlock(p);
1140 newofiles = kheap_alloc(KM_OFILETABL, numfiles * OFILESIZE,
1141 Z_WAITOK);
1142 proc_fdlock(p);
1143 if (newofiles == NULL) {
1144 return ENOMEM;
1145 }
1146 if (fdp->fd_nfiles >= numfiles) {
1147 kheap_free(KM_OFILETABL, newofiles, numfiles * OFILESIZE);
1148 continue;
1149 }
1150 newofileflags = (char *) &newofiles[numfiles];
1151 /*
1152 * Copy the existing ofile and ofileflags arrays
1153 * and zero the new portion of each array.
1154 */
1155 oldnfiles = fdp->fd_nfiles;
1156 (void) memcpy(newofiles, fdp->fd_ofiles,
1157 oldnfiles * sizeof(*fdp->fd_ofiles));
1158 (void) memset(&newofiles[oldnfiles], 0,
1159 (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
1160
1161 (void) memcpy(newofileflags, fdp->fd_ofileflags,
1162 oldnfiles * sizeof(*fdp->fd_ofileflags));
1163 (void) memset(&newofileflags[oldnfiles], 0,
1164 (numfiles - oldnfiles) *
1165 sizeof(*fdp->fd_ofileflags));
1166 ofiles = fdp->fd_ofiles;
1167 fdp->fd_ofiles = newofiles;
1168 fdp->fd_ofileflags = newofileflags;
1169 fdp->fd_nfiles = numfiles;
1170 kheap_free(KM_OFILETABL, ofiles, oldnfiles * OFILESIZE);
1171 fdexpand++;
1172 }
1173 }
1174
1175
1176 #pragma mark fileprocs
1177
1178 void
fileproc_modify_vflags(struct fileproc * fp,fileproc_vflags_t vflags,boolean_t clearflags)1179 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
1180 {
1181 if (clearflags) {
1182 os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
1183 } else {
1184 os_atomic_or(&fp->fp_vflags, vflags, relaxed);
1185 }
1186 }
1187
1188 fileproc_vflags_t
fileproc_get_vflags(struct fileproc * fp)1189 fileproc_get_vflags(struct fileproc *fp)
1190 {
1191 return os_atomic_load(&fp->fp_vflags, relaxed);
1192 }
1193
1194 /*
1195 * falloc_withinit
1196 *
1197 * Create a new open file structure and allocate
1198 * a file descriptor for the process that refers to it.
1199 *
1200 * Returns: 0 Success
1201 *
1202 * Description: Allocate an entry in the per process open file table and
1203 * return the corresponding fileproc and fd.
1204 *
1205 * Parameters: p The process in whose open file
1206 * table the fd is to be allocated
1207 * resultfp Pointer to fileproc pointer
1208 * return area
1209 * resultfd Pointer to fd return area
1210 * ctx VFS context
1211 * fp_zalloc fileproc allocator to use
1212 * crarg allocator args
1213 *
1214 * Returns: 0 Success
1215 * ENFILE Too many open files in system
1216 * fdalloc:EMFILE Too many open files in process
1217 * fdalloc:ENOMEM M_OFILETABL zone exhausted
1218 * ENOMEM fp_zone or fg_zone zone
1219 * exhausted
1220 *
1221 * Implicit returns:
1222 * *resultfd (modified) Returned fileproc pointer
1223 * *resultfd (modified) Returned fd
1224 *
1225 * Notes: This function takes separate process and context arguments
1226 * solely to support kern_exec.c; otherwise, it would take
1227 * neither, and use the vfs_context_current() routine internally.
1228 */
1229 int
falloc_withinit(proc_t p,struct fileproc ** resultfp,int * resultfd,vfs_context_t ctx,fp_initfn_t fp_init,void * initarg)1230 falloc_withinit(proc_t p, struct fileproc **resultfp, int *resultfd,
1231 vfs_context_t ctx, fp_initfn_t fp_init, void *initarg)
1232 {
1233 struct fileproc *fp;
1234 struct fileglob *fg;
1235 int error, nfd;
1236 #if CONFIG_MACF
1237 kauth_cred_t cred;
1238 #endif
1239
1240 /* Make sure we don't go beyond the system-wide limit */
1241 if (nfiles >= maxfiles) {
1242 tablefull("file");
1243 return ENFILE;
1244 }
1245
1246 proc_fdlock(p);
1247
1248 /* fdalloc will make sure the process stays below per-process limit */
1249 if ((error = fdalloc(p, 0, &nfd))) {
1250 proc_fdunlock(p);
1251 return error;
1252 }
1253
1254 #if CONFIG_MACF
1255 cred = kauth_cred_proc_ref(p);
1256 error = mac_file_check_create(cred);
1257 kauth_cred_unref(&cred);
1258 if (error) {
1259 proc_fdunlock(p);
1260 return error;
1261 }
1262 #endif
1263
1264 /*
1265 * Allocate a new file descriptor.
1266 * If the process has file descriptor zero open, add to the list
1267 * of open files at that point, otherwise put it at the front of
1268 * the list of open files.
1269 */
1270 proc_fdunlock(p);
1271
1272 fp = fileproc_alloc_init();
1273 if (fp_init) {
1274 fp_init(fp, initarg);
1275 }
1276
1277 fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
1278 lck_mtx_init(&fg->fg_lock, &file_lck_grp, LCK_ATTR_NULL);
1279
1280 os_ref_retain_locked(&fp->fp_iocount);
1281 os_ref_init_raw(&fg->fg_count, &f_refgrp);
1282 fg->fg_ops = &uninitops;
1283 fp->fp_glob = fg;
1284
1285 kauth_cred_ref(ctx->vc_ucred);
1286
1287 fp->f_cred = ctx->vc_ucred;
1288
1289 os_atomic_inc(&nfiles, relaxed);
1290
1291 proc_fdlock(p);
1292
1293 p->p_fd.fd_ofiles[nfd] = fp;
1294
1295 proc_fdunlock(p);
1296
1297 if (resultfp) {
1298 *resultfp = fp;
1299 }
1300 if (resultfd) {
1301 *resultfd = nfd;
1302 }
1303
1304 return 0;
1305 }
1306
1307 int
falloc(proc_t p,struct fileproc ** resultfp,int * resultfd,vfs_context_t ctx)1308 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
1309 {
1310 return falloc_withinit(p, resultfp, resultfd, ctx, NULL, NULL);
1311 }
1312
1313
1314 /*
1315 * fp_free
1316 *
1317 * Description: Release the fd and free the fileproc associated with the fd
1318 * in the per process open file table of the specified process;
1319 * these values must correspond.
1320 *
1321 * Parameters: p Process containing fd
1322 * fd fd to be released
1323 * fp fileproc to be freed
1324 */
1325 void
fp_free(proc_t p,int fd,struct fileproc * fp)1326 fp_free(proc_t p, int fd, struct fileproc * fp)
1327 {
1328 proc_fdlock_spin(p);
1329 fdrelse(p, fd);
1330 proc_fdunlock(p);
1331
1332 fg_free(fp->fp_glob);
1333 os_ref_release_live(&fp->fp_iocount);
1334 fileproc_free(fp);
1335 }
1336
1337
1338 struct fileproc *
fp_get_noref_locked(proc_t p,int fd)1339 fp_get_noref_locked(proc_t p, int fd)
1340 {
1341 struct filedesc *fdp = &p->p_fd;
1342 struct fileproc *fp;
1343
1344 if (fd < 0 || fd >= fdp->fd_nfiles ||
1345 (fp = fdp->fd_ofiles[fd]) == NULL ||
1346 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1347 return NULL;
1348 }
1349
1350 zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1351 return fp;
1352 }
1353
1354 struct fileproc *
fp_get_noref_locked_with_iocount(proc_t p,int fd)1355 fp_get_noref_locked_with_iocount(proc_t p, int fd)
1356 {
1357 struct filedesc *fdp = &p->p_fd;
1358 struct fileproc *fp = NULL;
1359
1360 if (fd < 0 || fd >= fdp->fd_nfiles ||
1361 (fp = fdp->fd_ofiles[fd]) == NULL ||
1362 os_ref_get_count(&fp->fp_iocount) <= 1 ||
1363 ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1364 !(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
1365 panic("%s: caller without an ioccount on fileproc (%d/:%p)",
1366 __func__, fd, fp);
1367 }
1368
1369 zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1370 return fp;
1371 }
1372
1373
1374 /*
1375 * fp_lookup
1376 *
1377 * Description: Get fileproc pointer for a given fd from the per process
1378 * open file table of the specified process and if successful,
1379 * increment the fp_iocount
1380 *
1381 * Parameters: p Process in which fd lives
1382 * fd fd to get information for
1383 * resultfp Pointer to result fileproc
1384 * pointer area, or 0 if none
1385 * locked !0 if the caller holds the
1386 * proc_fdlock, 0 otherwise
1387 *
1388 * Returns: 0 Success
1389 * EBADF Bad file descriptor
1390 *
1391 * Implicit returns:
1392 * *resultfp (modified) Fileproc pointer
1393 *
1394 * Locks: If the argument 'locked' is non-zero, then the caller is
1395 * expected to have taken and held the proc_fdlock; if it is
1396 * zero, than this routine internally takes and drops this lock.
1397 */
1398 int
fp_lookup(proc_t p,int fd,struct fileproc ** resultfp,int locked)1399 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
1400 {
1401 struct filedesc *fdp = &p->p_fd;
1402 struct fileproc *fp;
1403
1404 if (!locked) {
1405 proc_fdlock_spin(p);
1406 }
1407 if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
1408 (fp = fdp->fd_ofiles[fd]) == NULL ||
1409 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1410 if (!locked) {
1411 proc_fdunlock(p);
1412 }
1413 return EBADF;
1414 }
1415
1416 zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1417 os_ref_retain_locked(&fp->fp_iocount);
1418
1419 if (resultfp) {
1420 *resultfp = fp;
1421 }
1422 if (!locked) {
1423 proc_fdunlock(p);
1424 }
1425
1426 return 0;
1427 }
1428
1429
1430 int
fp_get_ftype(proc_t p,int fd,file_type_t ftype,int err,struct fileproc ** fpp)1431 fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
1432 {
1433 struct filedesc *fdp = &p->p_fd;
1434 struct fileproc *fp;
1435
1436 proc_fdlock_spin(p);
1437 if (fd < 0 || fd >= fdp->fd_nfiles ||
1438 (fp = fdp->fd_ofiles[fd]) == NULL ||
1439 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1440 proc_fdunlock(p);
1441 return EBADF;
1442 }
1443
1444 if (fp->f_type != ftype) {
1445 proc_fdunlock(p);
1446 return err;
1447 }
1448
1449 zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1450 os_ref_retain_locked(&fp->fp_iocount);
1451 proc_fdunlock(p);
1452
1453 *fpp = fp;
1454 return 0;
1455 }
1456
1457
1458 /*
1459 * fp_drop
1460 *
1461 * Description: Drop the I/O reference previously taken by calling fp_lookup
1462 * et. al.
1463 *
1464 * Parameters: p Process in which the fd lives
1465 * fd fd associated with the fileproc
1466 * fp fileproc on which to set the
1467 * flag and drop the reference
1468 * locked flag to internally take and
1469 * drop proc_fdlock if it is not
1470 * already held by the caller
1471 *
1472 * Returns: 0 Success
1473 * EBADF Bad file descriptor
1474 *
1475 * Locks: This function internally takes and drops the proc_fdlock for
1476 * the supplied process if 'locked' is non-zero, and assumes that
1477 * the caller already holds this lock if 'locked' is non-zero.
1478 *
1479 * Notes: The fileproc must correspond to the fd in the supplied proc
1480 */
1481 int
fp_drop(proc_t p,int fd,struct fileproc * fp,int locked)1482 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
1483 {
1484 struct filedesc *fdp = &p->p_fd;
1485 int needwakeup = 0;
1486
1487 if (!locked) {
1488 proc_fdlock_spin(p);
1489 }
1490 if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
1491 (fp = fdp->fd_ofiles[fd]) == NULL ||
1492 ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1493 !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
1494 if (!locked) {
1495 proc_fdunlock(p);
1496 }
1497 return EBADF;
1498 }
1499
1500 if (1 == os_ref_release_locked(&fp->fp_iocount)) {
1501 if (fp->fp_flags & FP_SELCONFLICT) {
1502 fp->fp_flags &= ~FP_SELCONFLICT;
1503 }
1504
1505 if (fdp->fd_fpdrainwait) {
1506 fdp->fd_fpdrainwait = 0;
1507 needwakeup = 1;
1508 }
1509 }
1510 if (!locked) {
1511 proc_fdunlock(p);
1512 }
1513 if (needwakeup) {
1514 wakeup(&fdp->fd_fpdrainwait);
1515 }
1516
1517 return 0;
1518 }
1519
1520
1521 /*
1522 * fileproc_drain
1523 *
1524 * Description: Drain out pending I/O operations
1525 *
1526 * Parameters: p Process closing this file
1527 * fp fileproc struct for the open
1528 * instance on the file
1529 *
1530 * Returns: void
1531 *
1532 * Locks: Assumes the caller holds the proc_fdlock
1533 *
1534 * Notes: For character devices, this occurs on the last close of the
1535 * device; for all other file descriptors, this occurs on each
1536 * close to prevent fd's from being closed out from under
1537 * operations currently in progress and blocked
1538 *
1539 * See Also: file_vnode(), file_socket(), file_drop(), and the cautions
1540 * regarding their use and interaction with this function.
1541 */
1542 static void
fileproc_drain(proc_t p,struct fileproc * fp)1543 fileproc_drain(proc_t p, struct fileproc * fp)
1544 {
1545 struct filedesc *fdp = &p->p_fd;
1546 struct vfs_context context;
1547 thread_t thread;
1548 bool is_current_proc;
1549
1550 is_current_proc = (p == current_proc());
1551
1552 if (!is_current_proc) {
1553 proc_lock(p);
1554 thread = proc_thread(p); /* XXX */
1555 thread_reference(thread);
1556 proc_unlock(p);
1557 } else {
1558 thread = current_thread();
1559 }
1560
1561 context.vc_thread = thread;
1562 context.vc_ucred = fp->fp_glob->fg_cred;
1563
1564 /* Set the vflag for drain */
1565 fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
1566
1567 while (os_ref_get_count(&fp->fp_iocount) > 1) {
1568 lck_mtx_convert_spin(&fdp->fd_lock);
1569
1570 fo_drain(fp, &context);
1571 if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
1572 struct waitq_set *wqset;
1573
1574 if (fp->fp_guard_attrs) {
1575 wqset = fp->fp_guard->fpg_wset;
1576 } else {
1577 wqset = fp->fp_wset;
1578 }
1579 if (waitq_wakeup64_all((struct waitq *)wqset, NO_EVENT64,
1580 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
1581 panic("bad wait queue for waitq_wakeup64_all %p (%sfp:%p)",
1582 wqset, fp->fp_guard_attrs ? "guarded " : "", fp);
1583 }
1584 }
1585 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1586 if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
1587 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
1588 panic("bad select_conflict_queue");
1589 }
1590 }
1591 fdp->fd_fpdrainwait = 1;
1592 msleep(&fdp->fd_fpdrainwait, &fdp->fd_lock, PRIBIO, "fpdrain", NULL);
1593 }
1594 #if DIAGNOSTIC
1595 if ((fp->fp_flags & FP_INSELECT) != 0) {
1596 panic("FP_INSELECT set on drained fp");
1597 }
1598 #endif
1599 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1600 fp->fp_flags &= ~FP_SELCONFLICT;
1601 }
1602
1603 if (!is_current_proc) {
1604 thread_deallocate(thread);
1605 }
1606 }
1607
1608
1609 int
fp_close_and_unlock(proc_t p,int fd,struct fileproc * fp,int flags)1610 fp_close_and_unlock(proc_t p, int fd, struct fileproc *fp, int flags)
1611 {
1612 struct filedesc *fdp = &p->p_fd;
1613 struct fileglob *fg = fp->fp_glob;
1614 #if CONFIG_MACF
1615 kauth_cred_t cred;
1616 #endif
1617
1618 #if DIAGNOSTIC
1619 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1620 #endif
1621
1622 /*
1623 * Keep most people from finding the filedesc while we are closing it.
1624 *
1625 * Callers are:
1626 *
1627 * - dup2() which always waits for UF_RESERVED to clear
1628 *
1629 * - close/guarded_close/... who will fail the fileproc lookup if
1630 * UF_RESERVED is set,
1631 *
1632 * - fdexec()/fdfree() who only run once all threads in the proc
1633 * are properly canceled, hence no fileproc in this proc should
1634 * be in flux.
1635 *
1636 * Which means that neither UF_RESERVED nor UF_CLOSING should be set.
1637 *
1638 * Callers of fp_get_noref_locked_with_iocount() can still find
1639 * this entry so that they can drop their I/O reference despite
1640 * not having remembered the fileproc pointer (namely select() and
1641 * file_drop()).
1642 */
1643 if (p->p_fd.fd_ofileflags[fd] & (UF_RESERVED | UF_CLOSING)) {
1644 panic("%s: called with fileproc in flux (%d/:%p)",
1645 __func__, fd, fp);
1646 }
1647 p->p_fd.fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
1648
1649 if ((fp->fp_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners()) {
1650 proc_fdunlock(p);
1651
1652 if ((FILEGLOB_DTYPE(fg) == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners()) {
1653 /*
1654 * call out to allow 3rd party notification of close.
1655 * Ignore result of kauth_authorize_fileop call.
1656 */
1657 if (vnode_getwithref((vnode_t)fg_get_data(fg)) == 0) {
1658 u_int fileop_flags = 0;
1659 if (fg->fg_flag & FWASWRITTEN) {
1660 fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
1661 }
1662 kauth_authorize_fileop(fg->fg_cred, KAUTH_FILEOP_CLOSE,
1663 (uintptr_t)fg_get_data(fg), (uintptr_t)fileop_flags);
1664 #if CONFIG_MACF
1665 cred = kauth_cred_proc_ref(p);
1666 mac_file_notify_close(cred, fp->fp_glob);
1667 kauth_cred_unref(&cred);
1668 #endif
1669 vnode_put((vnode_t)fg_get_data(fg));
1670 }
1671 }
1672 if (fp->fp_flags & FP_AIOISSUED) {
1673 /*
1674 * cancel all async IO requests that can be cancelled.
1675 */
1676 _aio_close( p, fd );
1677 }
1678
1679 proc_fdlock(p);
1680 }
1681
1682 if (fd < fdp->fd_knlistsize) {
1683 knote_fdclose(p, fd);
1684 }
1685
1686 fileproc_drain(p, fp);
1687
1688 if (flags & FD_DUP2RESV) {
1689 fdp->fd_ofiles[fd] = NULL;
1690 fdp->fd_ofileflags[fd] &= ~UF_CLOSING;
1691 } else {
1692 fdrelse(p, fd);
1693 }
1694
1695 proc_fdunlock(p);
1696
1697 if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
1698 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
1699 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fg_get_data(fg)));
1700 }
1701
1702 fileproc_free(fp);
1703
1704 return fg_drop(p, fg);
1705 }
1706
1707
1708 /*
1709 * dupfdopen
1710 *
1711 * Description: Duplicate the specified descriptor to a free descriptor;
1712 * this is the second half of fdopen(), above.
1713 *
1714 * Parameters: p current process pointer
1715 * indx fd to dup to
1716 * dfd fd to dup from
1717 * mode mode to set on new fd
1718 * error command code
1719 *
1720 * Returns: 0 Success
1721 * EBADF Source fd is bad
1722 * EACCES Requested mode not allowed
1723 * !0 'error', if not ENODEV or
1724 * ENXIO
1725 *
1726 * Notes: XXX This is not thread safe; see fdopen() above
1727 */
1728 int
dupfdopen(proc_t p,int indx,int dfd,int flags,int error)1729 dupfdopen(proc_t p, int indx, int dfd, int flags, int error)
1730 {
1731 struct filedesc *fdp = &p->p_fd;
1732 struct fileproc *wfp;
1733 struct fileproc *fp;
1734 #if CONFIG_MACF
1735 int myerror;
1736 #endif
1737
1738 /*
1739 * If the to-be-dup'd fd number is greater than the allowed number
1740 * of file descriptors, or the fd to be dup'd has already been
1741 * closed, reject. Note, check for new == old is necessary as
1742 * falloc could allocate an already closed to-be-dup'd descriptor
1743 * as the new descriptor.
1744 */
1745 proc_fdlock(p);
1746
1747 fp = fdp->fd_ofiles[indx];
1748 if (dfd < 0 || dfd >= fdp->fd_nfiles ||
1749 (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
1750 (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
1751 proc_fdunlock(p);
1752 return EBADF;
1753 }
1754 #if CONFIG_MACF
1755 myerror = mac_file_check_dup(kauth_cred_get(), wfp->fp_glob, dfd);
1756 if (myerror) {
1757 proc_fdunlock(p);
1758 return myerror;
1759 }
1760 #endif
1761 /*
1762 * There are two cases of interest here.
1763 *
1764 * For ENODEV simply dup (dfd) to file descriptor
1765 * (indx) and return.
1766 *
1767 * For ENXIO steal away the file structure from (dfd) and
1768 * store it in (indx). (dfd) is effectively closed by
1769 * this operation.
1770 *
1771 * Any other error code is just returned.
1772 */
1773 switch (error) {
1774 case ENODEV:
1775 if (fp_isguarded(wfp, GUARD_DUP)) {
1776 proc_fdunlock(p);
1777 return EPERM;
1778 }
1779
1780 /*
1781 * Check that the mode the file is being opened for is a
1782 * subset of the mode of the existing descriptor.
1783 */
1784 if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1785 proc_fdunlock(p);
1786 return EACCES;
1787 }
1788 if (indx >= fdp->fd_afterlast) {
1789 fdp->fd_afterlast = indx + 1;
1790 }
1791
1792 if (fp->fp_glob) {
1793 fg_free(fp->fp_glob);
1794 }
1795 fg_ref(p, wfp->fp_glob);
1796 fp->fp_glob = wfp->fp_glob;
1797 /*
1798 * Historically, open(/dev/fd/<n>) preserves close on fork/exec,
1799 * unlike dup(), dup2() or fcntl(F_DUPFD).
1800 *
1801 * open1() already handled O_CLO{EXEC,FORK}
1802 */
1803 fp->fp_flags |= (wfp->fp_flags & (FP_CLOFORK | FP_CLOEXEC));
1804
1805 procfdtbl_releasefd(p, indx, NULL);
1806 fp_drop(p, indx, fp, 1);
1807 proc_fdunlock(p);
1808 return 0;
1809
1810 default:
1811 proc_fdunlock(p);
1812 return error;
1813 }
1814 /* NOTREACHED */
1815 }
1816
1817
1818 #pragma mark KPIS (sys/file.h)
1819
1820 /*
1821 * fg_get_vnode
1822 *
1823 * Description: Return vnode associated with the file structure, if
1824 * any. The lifetime of the returned vnode is bound to
1825 * the lifetime of the file structure.
1826 *
1827 * Parameters: fg Pointer to fileglob to
1828 * inspect
1829 *
1830 * Returns: vnode_t
1831 */
1832 vnode_t
fg_get_vnode(struct fileglob * fg)1833 fg_get_vnode(struct fileglob *fg)
1834 {
1835 if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1836 return (vnode_t)fg_get_data(fg);
1837 } else {
1838 return NULL;
1839 }
1840 }
1841
1842
1843 /*
1844 * fp_getfvp
1845 *
1846 * Description: Get fileproc and vnode pointer for a given fd from the per
1847 * process open file table of the specified process, and if
1848 * successful, increment the fp_iocount
1849 *
1850 * Parameters: p Process in which fd lives
1851 * fd fd to get information for
1852 * resultfp Pointer to result fileproc
1853 * pointer area, or 0 if none
1854 * resultvp Pointer to result vnode pointer
1855 * area, or 0 if none
1856 *
1857 * Returns: 0 Success
1858 * EBADF Bad file descriptor
1859 * ENOTSUP fd does not refer to a vnode
1860 *
1861 * Implicit returns:
1862 * *resultfp (modified) Fileproc pointer
1863 * *resultvp (modified) vnode pointer
1864 *
1865 * Notes: The resultfp and resultvp fields are optional, and may be
1866 * independently specified as NULL to skip returning information
1867 *
1868 * Locks: Internally takes and releases proc_fdlock
1869 */
1870 int
fp_getfvp(proc_t p,int fd,struct fileproc ** resultfp,struct vnode ** resultvp)1871 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
1872 {
1873 struct fileproc *fp;
1874 int error;
1875
1876 error = fp_get_ftype(p, fd, DTYPE_VNODE, ENOTSUP, &fp);
1877 if (error == 0) {
1878 if (resultfp) {
1879 *resultfp = fp;
1880 }
1881 if (resultvp) {
1882 *resultvp = (struct vnode *)fp_get_data(fp);
1883 }
1884 }
1885
1886 return error;
1887 }
1888
1889
1890 /*
1891 * fp_get_pipe_id
1892 *
1893 * Description: Get pipe id for a given fd from the per process open file table
1894 * of the specified process.
1895 *
1896 * Parameters: p Process in which fd lives
1897 * fd fd to get information for
1898 * result_pipe_id Pointer to result pipe id
1899 *
1900 * Returns: 0 Success
1901 * EIVAL NULL pointer arguments passed
1902 * fp_lookup:EBADF Bad file descriptor
1903 * ENOTSUP fd does not refer to a pipe
1904 *
1905 * Implicit returns:
1906 * *result_pipe_id (modified) pipe id
1907 *
1908 * Locks: Internally takes and releases proc_fdlock
1909 */
1910 int
fp_get_pipe_id(proc_t p,int fd,uint64_t * result_pipe_id)1911 fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
1912 {
1913 struct fileproc *fp = FILEPROC_NULL;
1914 struct fileglob *fg = NULL;
1915 int error = 0;
1916
1917 if (p == NULL || result_pipe_id == NULL) {
1918 return EINVAL;
1919 }
1920
1921 proc_fdlock(p);
1922 if ((error = fp_lookup(p, fd, &fp, 1))) {
1923 proc_fdunlock(p);
1924 return error;
1925 }
1926 fg = fp->fp_glob;
1927
1928 if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
1929 *result_pipe_id = pipe_id((struct pipe*)fg_get_data(fg));
1930 } else {
1931 error = ENOTSUP;
1932 }
1933
1934 fp_drop(p, fd, fp, 1);
1935 proc_fdunlock(p);
1936 return error;
1937 }
1938
1939
1940 /*
1941 * file_vnode
1942 *
1943 * Description: Given an fd, look it up in the current process's per process
1944 * open file table, and return its internal vnode pointer.
1945 *
1946 * Parameters: fd fd to obtain vnode from
1947 * vpp pointer to vnode return area
1948 *
1949 * Returns: 0 Success
1950 * EINVAL The fd does not refer to a
1951 * vnode fileproc entry
1952 * fp_lookup:EBADF Bad file descriptor
1953 *
1954 * Implicit returns:
1955 * *vpp (modified) Returned vnode pointer
1956 *
1957 * Locks: This function internally takes and drops the proc_fdlock for
1958 * the current process
1959 *
1960 * Notes: If successful, this function increments the fp_iocount on the
1961 * fd's corresponding fileproc.
1962 *
1963 * The fileproc referenced is not returned; because of this, care
1964 * must be taken to not drop the last reference (e.g. by closing
1965 * the file). This is inherently unsafe, since the reference may
1966 * not be recoverable from the vnode, if there is a subsequent
1967 * close that destroys the associate fileproc. The caller should
1968 * therefore retain their own reference on the fileproc so that
1969 * the fp_iocount can be dropped subsequently. Failure to do this
1970 * can result in the returned pointer immediately becoming invalid
1971 * following the call.
1972 *
1973 * Use of this function is discouraged.
1974 */
1975 int
file_vnode(int fd,struct vnode ** vpp)1976 file_vnode(int fd, struct vnode **vpp)
1977 {
1978 return file_vnode_withvid(fd, vpp, NULL);
1979 }
1980
1981
1982 /*
1983 * file_vnode_withvid
1984 *
1985 * Description: Given an fd, look it up in the current process's per process
1986 * open file table, and return its internal vnode pointer.
1987 *
1988 * Parameters: fd fd to obtain vnode from
1989 * vpp pointer to vnode return area
1990 * vidp pointer to vid of the returned vnode
1991 *
1992 * Returns: 0 Success
1993 * EINVAL The fd does not refer to a
1994 * vnode fileproc entry
1995 * fp_lookup:EBADF Bad file descriptor
1996 *
1997 * Implicit returns:
1998 * *vpp (modified) Returned vnode pointer
1999 *
2000 * Locks: This function internally takes and drops the proc_fdlock for
2001 * the current process
2002 *
2003 * Notes: If successful, this function increments the fp_iocount on the
2004 * fd's corresponding fileproc.
2005 *
2006 * The fileproc referenced is not returned; because of this, care
2007 * must be taken to not drop the last reference (e.g. by closing
2008 * the file). This is inherently unsafe, since the reference may
2009 * not be recoverable from the vnode, if there is a subsequent
2010 * close that destroys the associate fileproc. The caller should
2011 * therefore retain their own reference on the fileproc so that
2012 * the fp_iocount can be dropped subsequently. Failure to do this
2013 * can result in the returned pointer immediately becoming invalid
2014 * following the call.
2015 *
2016 * Use of this function is discouraged.
2017 */
2018 int
file_vnode_withvid(int fd,struct vnode ** vpp,uint32_t * vidp)2019 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t *vidp)
2020 {
2021 struct fileproc *fp;
2022 int error;
2023
2024 error = fp_get_ftype(current_proc(), fd, DTYPE_VNODE, EINVAL, &fp);
2025 if (error == 0) {
2026 if (vpp) {
2027 *vpp = (struct vnode *)fp_get_data(fp);
2028 }
2029 if (vidp) {
2030 *vidp = vnode_vid((struct vnode *)fp_get_data(fp));
2031 }
2032 }
2033 return error;
2034 }
2035
2036 /*
2037 * file_socket
2038 *
2039 * Description: Given an fd, look it up in the current process's per process
2040 * open file table, and return its internal socket pointer.
2041 *
2042 * Parameters: fd fd to obtain vnode from
2043 * sp pointer to socket return area
2044 *
2045 * Returns: 0 Success
2046 * ENOTSOCK Not a socket
2047 * fp_lookup:EBADF Bad file descriptor
2048 *
2049 * Implicit returns:
2050 * *sp (modified) Returned socket pointer
2051 *
2052 * Locks: This function internally takes and drops the proc_fdlock for
2053 * the current process
2054 *
2055 * Notes: If successful, this function increments the fp_iocount on the
2056 * fd's corresponding fileproc.
2057 *
2058 * The fileproc referenced is not returned; because of this, care
2059 * must be taken to not drop the last reference (e.g. by closing
2060 * the file). This is inherently unsafe, since the reference may
2061 * not be recoverable from the socket, if there is a subsequent
2062 * close that destroys the associate fileproc. The caller should
2063 * therefore retain their own reference on the fileproc so that
2064 * the fp_iocount can be dropped subsequently. Failure to do this
2065 * can result in the returned pointer immediately becoming invalid
2066 * following the call.
2067 *
2068 * Use of this function is discouraged.
2069 */
2070 int
file_socket(int fd,struct socket ** sp)2071 file_socket(int fd, struct socket **sp)
2072 {
2073 struct fileproc *fp;
2074 int error;
2075
2076 error = fp_get_ftype(current_proc(), fd, DTYPE_SOCKET, ENOTSOCK, &fp);
2077 if (error == 0) {
2078 if (sp) {
2079 *sp = (struct socket *)fp_get_data(fp);
2080 }
2081 }
2082 return error;
2083 }
2084
2085
2086 /*
2087 * file_flags
2088 *
2089 * Description: Given an fd, look it up in the current process's per process
2090 * open file table, and return its fileproc's flags field.
2091 *
2092 * Parameters: fd fd whose flags are to be
2093 * retrieved
2094 * flags pointer to flags data area
2095 *
2096 * Returns: 0 Success
2097 * ENOTSOCK Not a socket
2098 * fp_lookup:EBADF Bad file descriptor
2099 *
2100 * Implicit returns:
2101 * *flags (modified) Returned flags field
2102 *
2103 * Locks: This function internally takes and drops the proc_fdlock for
2104 * the current process
2105 */
2106 int
file_flags(int fd,int * flags)2107 file_flags(int fd, int *flags)
2108 {
2109 proc_t p = current_proc();
2110 struct fileproc *fp;
2111 int error = EBADF;
2112
2113 proc_fdlock_spin(p);
2114 fp = fp_get_noref_locked(p, fd);
2115 if (fp) {
2116 *flags = (int)fp->f_flag;
2117 error = 0;
2118 }
2119 proc_fdunlock(p);
2120
2121 return error;
2122 }
2123
2124
2125 /*
2126 * file_drop
2127 *
2128 * Description: Drop an iocount reference on an fd, and wake up any waiters
2129 * for draining (i.e. blocked in fileproc_drain() called during
2130 * the last attempt to close a file).
2131 *
2132 * Parameters: fd fd on which an ioreference is
2133 * to be dropped
2134 *
2135 * Returns: 0 Success
2136 *
2137 * Description: Given an fd, look it up in the current process's per process
2138 * open file table, and drop it's fileproc's fp_iocount by one
2139 *
2140 * Notes: This is intended as a corresponding operation to the functions
2141 * file_vnode() and file_socket() operations.
2142 *
2143 * If the caller can't possibly hold an I/O reference,
2144 * this function will panic the kernel rather than allowing
2145 * for memory corruption. Callers should always call this
2146 * because they acquired an I/O reference on this file before.
2147 *
2148 * Use of this function is discouraged.
2149 */
2150 int
file_drop(int fd)2151 file_drop(int fd)
2152 {
2153 struct fileproc *fp;
2154 proc_t p = current_proc();
2155 struct filedesc *fdp = &p->p_fd;
2156 int needwakeup = 0;
2157
2158 proc_fdlock_spin(p);
2159 fp = fp_get_noref_locked_with_iocount(p, fd);
2160
2161 if (1 == os_ref_release_locked(&fp->fp_iocount)) {
2162 if (fp->fp_flags & FP_SELCONFLICT) {
2163 fp->fp_flags &= ~FP_SELCONFLICT;
2164 }
2165
2166 if (fdp->fd_fpdrainwait) {
2167 fdp->fd_fpdrainwait = 0;
2168 needwakeup = 1;
2169 }
2170 }
2171 proc_fdunlock(p);
2172
2173 if (needwakeup) {
2174 wakeup(&fdp->fd_fpdrainwait);
2175 }
2176 return 0;
2177 }
2178
2179
2180 int
fd_rdwr(int fd,enum uio_rw rw,uint64_t base,int64_t len,enum uio_seg segflg,off_t offset,int io_flg,int64_t * aresid)2181 fd_rdwr(
2182 int fd,
2183 enum uio_rw rw,
2184 uint64_t base,
2185 int64_t len,
2186 enum uio_seg segflg,
2187 off_t offset,
2188 int io_flg,
2189 int64_t *aresid)
2190 {
2191 struct fileproc *fp;
2192 proc_t p;
2193 int error = 0;
2194 int flags = 0;
2195 int spacetype;
2196 uio_t auio = NULL;
2197 uio_stackbuf_t uio_buf[UIO_SIZEOF(1)];
2198 struct vfs_context context = *(vfs_context_current());
2199
2200 p = current_proc();
2201
2202 error = fp_lookup(p, fd, &fp, 0);
2203 if (error) {
2204 return error;
2205 }
2206
2207 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
2208 case DTYPE_VNODE:
2209 case DTYPE_PIPE:
2210 case DTYPE_SOCKET:
2211 break;
2212 default:
2213 error = EINVAL;
2214 goto out;
2215 }
2216 if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
2217 error = EBADF;
2218 goto out;
2219 }
2220
2221 if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
2222 error = EBADF;
2223 goto out;
2224 }
2225
2226 context.vc_ucred = fp->fp_glob->fg_cred;
2227
2228 if (UIO_SEG_IS_USER_SPACE(segflg)) {
2229 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
2230 } else {
2231 spacetype = UIO_SYSSPACE;
2232 }
2233
2234 auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
2235
2236 uio_addiov(auio, (user_addr_t)base, (user_size_t)len);
2237
2238 if (!(io_flg & IO_APPEND)) {
2239 flags = FOF_OFFSET;
2240 }
2241
2242 if (rw == UIO_WRITE) {
2243 user_ssize_t orig_resid = uio_resid(auio);
2244 error = fo_write(fp, auio, flags, &context);
2245 if (uio_resid(auio) < orig_resid) {
2246 os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
2247 }
2248 } else {
2249 error = fo_read(fp, auio, flags, &context);
2250 }
2251
2252 if (aresid) {
2253 *aresid = uio_resid(auio);
2254 } else if (uio_resid(auio) && error == 0) {
2255 error = EIO;
2256 }
2257 out:
2258 fp_drop(p, fd, fp, 0);
2259 return error;
2260 }
2261
2262
2263 #pragma mark syscalls
2264
2265 #ifndef HFS_GET_BOOT_INFO
2266 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
2267 #endif
2268
2269 #ifndef HFS_SET_BOOT_INFO
2270 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
2271 #endif
2272
2273 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
2274 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
2275 #endif
2276
2277 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
2278 (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
2279 (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
2280 ? 1 : 0)
2281
2282 /*
2283 * sys_getdtablesize
2284 *
2285 * Description: Returns the per process maximum size of the descriptor table
2286 *
2287 * Parameters: p Process being queried
2288 * retval Pointer to the call return area
2289 *
2290 * Returns: 0 Success
2291 *
2292 * Implicit returns:
2293 * *retval (modified) Size of dtable
2294 */
2295 int
sys_getdtablesize(proc_t p,__unused struct getdtablesize_args * uap,int32_t * retval)2296 sys_getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
2297 {
2298 *retval = proc_limitgetcur_nofile(p);
2299 return 0;
2300 }
2301
2302
2303 /*
2304 * check_file_seek_range
2305 *
2306 * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
2307 *
2308 * Parameters: fl Flock structure.
2309 * cur_file_offset Current offset in the file.
2310 *
2311 * Returns: 0 on Success.
2312 * EOVERFLOW on overflow.
2313 * EINVAL on offset less than zero.
2314 */
2315
2316 static int
check_file_seek_range(struct flock * fl,off_t cur_file_offset)2317 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
2318 {
2319 if (fl->l_whence == SEEK_CUR) {
2320 /* Check if the start marker is beyond LLONG_MAX. */
2321 if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
2322 /* Check if start marker is negative */
2323 if (fl->l_start < 0) {
2324 return EINVAL;
2325 }
2326 return EOVERFLOW;
2327 }
2328 /* Check if the start marker is negative. */
2329 if (fl->l_start + cur_file_offset < 0) {
2330 return EINVAL;
2331 }
2332 /* Check if end marker is beyond LLONG_MAX. */
2333 if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
2334 cur_file_offset, fl->l_len - 1))) {
2335 return EOVERFLOW;
2336 }
2337 /* Check if the end marker is negative. */
2338 if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
2339 fl->l_len < 0)) {
2340 return EINVAL;
2341 }
2342 } else if (fl->l_whence == SEEK_SET) {
2343 /* Check if the start marker is negative. */
2344 if (fl->l_start < 0) {
2345 return EINVAL;
2346 }
2347 /* Check if the end marker is beyond LLONG_MAX. */
2348 if ((fl->l_len > 0) &&
2349 CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
2350 return EOVERFLOW;
2351 }
2352 /* Check if the end marker is negative. */
2353 if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
2354 return EINVAL;
2355 }
2356 }
2357 return 0;
2358 }
2359
2360
2361 /*
2362 * sys_dup
2363 *
2364 * Description: Duplicate a file descriptor.
2365 *
2366 * Parameters: p Process performing the dup
2367 * uap->fd The fd to dup
2368 * retval Pointer to the call return area
2369 *
2370 * Returns: 0 Success
2371 * !0 Errno
2372 *
2373 * Implicit returns:
2374 * *retval (modified) The new descriptor
2375 */
2376 int
sys_dup(proc_t p,struct dup_args * uap,int32_t * retval)2377 sys_dup(proc_t p, struct dup_args *uap, int32_t *retval)
2378 {
2379 struct filedesc *fdp = &p->p_fd;
2380 int old = uap->fd;
2381 int new, error;
2382 struct fileproc *fp;
2383
2384 proc_fdlock(p);
2385 if ((error = fp_lookup(p, old, &fp, 1))) {
2386 proc_fdunlock(p);
2387 return error;
2388 }
2389 if (fp_isguarded(fp, GUARD_DUP)) {
2390 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2391 (void) fp_drop(p, old, fp, 1);
2392 proc_fdunlock(p);
2393 return error;
2394 }
2395 if ((error = fdalloc(p, 0, &new))) {
2396 fp_drop(p, old, fp, 1);
2397 proc_fdunlock(p);
2398 return error;
2399 }
2400 error = finishdup(p, fdp, old, new, 0, retval);
2401 fp_drop(p, old, fp, 1);
2402 proc_fdunlock(p);
2403
2404 if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
2405 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
2406 new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp_get_data(fp)));
2407 }
2408
2409 return error;
2410 }
2411
2412 /*
2413 * sys_dup2
2414 *
2415 * Description: Duplicate a file descriptor to a particular value.
2416 *
2417 * Parameters: p Process performing the dup
2418 * uap->from The fd to dup
2419 * uap->to The fd to dup it to
2420 * retval Pointer to the call return area
2421 *
2422 * Returns: 0 Success
2423 * !0 Errno
2424 *
2425 * Implicit returns:
2426 * *retval (modified) The new descriptor
2427 */
2428 int
sys_dup2(proc_t p,struct dup2_args * uap,int32_t * retval)2429 sys_dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
2430 {
2431 return dup2(p, uap->from, uap->to, retval);
2432 }
2433
2434 int
dup2(proc_t p,int old,int new,int * retval)2435 dup2(proc_t p, int old, int new, int *retval)
2436 {
2437 struct filedesc *fdp = &p->p_fd;
2438 struct fileproc *fp, *nfp;
2439 int i, error;
2440
2441 proc_fdlock(p);
2442
2443 startover:
2444 if ((error = fp_lookup(p, old, &fp, 1))) {
2445 proc_fdunlock(p);
2446 return error;
2447 }
2448 if (fp_isguarded(fp, GUARD_DUP)) {
2449 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2450 (void) fp_drop(p, old, fp, 1);
2451 proc_fdunlock(p);
2452 return error;
2453 }
2454 if (new < 0 || new >= proc_limitgetcur_nofile(p)) {
2455 fp_drop(p, old, fp, 1);
2456 proc_fdunlock(p);
2457 return EBADF;
2458 }
2459 if (old == new) {
2460 fp_drop(p, old, fp, 1);
2461 *retval = new;
2462 proc_fdunlock(p);
2463 return 0;
2464 }
2465 if (new < 0 || new >= fdp->fd_nfiles) {
2466 if ((error = fdalloc(p, new, &i))) {
2467 fp_drop(p, old, fp, 1);
2468 proc_fdunlock(p);
2469 return error;
2470 }
2471 if (new != i) {
2472 fdrelse(p, i);
2473 goto closeit;
2474 }
2475 } else {
2476 closeit:
2477 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
2478 fp_drop(p, old, fp, 1);
2479 procfdtbl_waitfd(p, new);
2480 #if DIAGNOSTIC
2481 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2482 #endif
2483 goto startover;
2484 }
2485
2486 if ((nfp = fdp->fd_ofiles[new]) != NULL) {
2487 if (fp_isguarded(nfp, GUARD_CLOSE)) {
2488 fp_drop(p, old, fp, 1);
2489 error = fp_guard_exception(p,
2490 new, nfp, kGUARD_EXC_CLOSE);
2491 proc_fdunlock(p);
2492 return error;
2493 }
2494 (void)fp_close_and_unlock(p, new, nfp, FD_DUP2RESV);
2495 proc_fdlock(p);
2496 assert(fdp->fd_ofileflags[new] & UF_RESERVED);
2497 } else {
2498 #if DIAGNOSTIC
2499 if (fdp->fd_ofiles[new] != NULL) {
2500 panic("dup2: no ref on fileproc %d", new);
2501 }
2502 #endif
2503 procfdtbl_reservefd(p, new);
2504 }
2505 }
2506 #if DIAGNOSTIC
2507 if (fdp->fd_ofiles[new] != 0) {
2508 panic("dup2: overwriting fd_ofiles with new %d", new);
2509 }
2510 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
2511 panic("dup2: unreserved fileflags with new %d", new);
2512 }
2513 #endif
2514 error = finishdup(p, fdp, old, new, 0, retval);
2515 fp_drop(p, old, fp, 1);
2516 proc_fdunlock(p);
2517
2518 return error;
2519 }
2520
2521
2522 /*
2523 * fcntl
2524 *
2525 * Description: The file control system call.
2526 *
2527 * Parameters: p Process performing the fcntl
2528 * uap->fd The fd to operate against
2529 * uap->cmd The command to perform
2530 * uap->arg Pointer to the command argument
2531 * retval Pointer to the call return area
2532 *
2533 * Returns: 0 Success
2534 * !0 Errno (see fcntl_nocancel)
2535 *
2536 * Implicit returns:
2537 * *retval (modified) fcntl return value (if any)
2538 *
2539 * Notes: This system call differs from fcntl_nocancel() in that it
2540 * tests for cancellation prior to performing a potentially
2541 * blocking operation.
2542 */
2543 int
sys_fcntl(proc_t p,struct fcntl_args * uap,int32_t * retval)2544 sys_fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
2545 {
2546 __pthread_testcancel(1);
2547 return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
2548 }
2549
2550 #define ACCOUNT_OPENFROM_ENTITLEMENT \
2551 "com.apple.private.vfs.role-account-openfrom"
2552
2553 /*
2554 * sys_fcntl_nocancel
2555 *
2556 * Description: A non-cancel-testing file control system call.
2557 *
2558 * Parameters: p Process performing the fcntl
2559 * uap->fd The fd to operate against
2560 * uap->cmd The command to perform
2561 * uap->arg Pointer to the command argument
2562 * retval Pointer to the call return area
2563 *
2564 * Returns: 0 Success
2565 * EINVAL
2566 * fp_lookup:EBADF Bad file descriptor
2567 * [F_DUPFD]
2568 * fdalloc:EMFILE
2569 * fdalloc:ENOMEM
2570 * finishdup:EBADF
2571 * finishdup:ENOMEM
2572 * [F_SETOWN]
2573 * ESRCH
2574 * [F_SETLK]
2575 * EBADF
2576 * EOVERFLOW
2577 * copyin:EFAULT
2578 * vnode_getwithref:???
2579 * VNOP_ADVLOCK:???
2580 * msleep:ETIMEDOUT
2581 * [F_GETLK]
2582 * EBADF
2583 * EOVERFLOW
2584 * copyin:EFAULT
2585 * copyout:EFAULT
2586 * vnode_getwithref:???
2587 * VNOP_ADVLOCK:???
2588 * [F_PREALLOCATE]
2589 * EBADF
2590 * EINVAL
2591 * copyin:EFAULT
2592 * copyout:EFAULT
2593 * vnode_getwithref:???
2594 * VNOP_ALLOCATE:???
2595 * [F_SETSIZE,F_RDADVISE]
2596 * EBADF
2597 * EINVAL
2598 * copyin:EFAULT
2599 * vnode_getwithref:???
2600 * [F_RDAHEAD,F_NOCACHE]
2601 * EBADF
2602 * vnode_getwithref:???
2603 * [???]
2604 *
2605 * Implicit returns:
2606 * *retval (modified) fcntl return value (if any)
2607 */
2608 #define SYS_FCNTL_DECLARE_VFS_CONTEXT(context) \
2609 struct vfs_context context = { \
2610 .vc_thread = current_thread(), \
2611 .vc_ucred = fp->f_cred, \
2612 }
2613
2614 static user_addr_t
sys_fnctl_parse_arg(proc_t p,user_long_t arg)2615 sys_fnctl_parse_arg(proc_t p, user_long_t arg)
2616 {
2617 /*
2618 * Since the arg parameter is defined as a long but may be
2619 * either a long or a pointer we must take care to handle
2620 * sign extension issues. Our sys call munger will sign
2621 * extend a long when we are called from a 32-bit process.
2622 * Since we can never have an address greater than 32-bits
2623 * from a 32-bit process we lop off the top 32-bits to avoid
2624 * getting the wrong address
2625 */
2626 return proc_is64bit(p) ? arg : CAST_USER_ADDR_T((uint32_t)arg);
2627 }
2628
2629 /* cleanup code common to fnctl functions, for when the fdlock is still held */
2630 static int
sys_fcntl_out(proc_t p,int fd,struct fileproc * fp,int error)2631 sys_fcntl_out(proc_t p, int fd, struct fileproc *fp, int error)
2632 {
2633 fp_drop(p, fd, fp, 1);
2634 proc_fdunlock(p);
2635 return error;
2636 }
2637
2638 /* cleanup code common to fnctl acting on vnodes, once they unlocked the fdlock */
2639 static int
sys_fcntl_outdrop(proc_t p,int fd,struct fileproc * fp,struct vnode * vp,int error)2640 sys_fcntl_outdrop(proc_t p, int fd, struct fileproc *fp, struct vnode *vp, int error)
2641 {
2642 #pragma unused(vp)
2643
2644 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2645 fp_drop(p, fd, fp, 0);
2646 return error;
2647 }
2648
2649 typedef int (*sys_fnctl_handler_t)(proc_t p, int fd, int cmd, user_long_t arg,
2650 struct fileproc *fp, int32_t *retval);
2651
2652 typedef int (*sys_fnctl_vnode_handler_t)(proc_t p, int fd, int cmd,
2653 user_long_t arg, struct fileproc *fp, struct vnode *vp, int32_t *retval);
2654
2655 /*
2656 * SPI (private) for opening a file starting from a dir fd
2657 *
2658 * Note: do not inline to keep stack usage under control.
2659 */
2660 __attribute__((noinline))
2661 static int
sys_fcntl__OPENFROM(proc_t p,int fd,int cmd,user_long_t arg,struct fileproc * fp,struct vnode * vp,int32_t * retval)2662 sys_fcntl__OPENFROM(proc_t p, int fd, int cmd, user_long_t arg,
2663 struct fileproc *fp, struct vnode *vp, int32_t *retval)
2664 {
2665 #pragma unused(cmd)
2666
2667 user_addr_t argp = sys_fnctl_parse_arg(p, arg);
2668 struct user_fopenfrom fopen;
2669 struct vnode_attr *va;
2670 struct nameidata *nd;
2671 int error, cmode;
2672 bool has_entitlement;
2673
2674 /* Check if this isn't a valid file descriptor */
2675 if ((fp->f_flag & FREAD) == 0) {
2676 return sys_fcntl_out(p, fd, fp, EBADF);
2677 }
2678 proc_fdunlock(p);
2679
2680 if (vnode_getwithref(vp)) {
2681 error = ENOENT;
2682 goto outdrop;
2683 }
2684
2685 /* Only valid for directories */
2686 if (vp->v_type != VDIR) {
2687 vnode_put(vp);
2688 error = ENOTDIR;
2689 goto outdrop;
2690 }
2691
2692 /*
2693 * Only entitled apps may use the credentials of the thread
2694 * that opened the file descriptor.
2695 * Non-entitled threads will use their own context.
2696 */
2697 has_entitlement = IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT);
2698
2699 /* Get flags, mode and pathname arguments. */
2700 if (IS_64BIT_PROCESS(p)) {
2701 error = copyin(argp, &fopen, sizeof(fopen));
2702 } else {
2703 struct user32_fopenfrom fopen32;
2704
2705 error = copyin(argp, &fopen32, sizeof(fopen32));
2706 fopen.o_flags = fopen32.o_flags;
2707 fopen.o_mode = fopen32.o_mode;
2708 fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
2709 }
2710 if (error) {
2711 vnode_put(vp);
2712 goto outdrop;
2713 }
2714
2715 /* open1() can have really deep stacks, so allocate those */
2716 va = kalloc_type(struct vnode_attr, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2717 nd = kalloc_type(struct nameidata, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2718
2719 AUDIT_ARG(fflags, fopen.o_flags);
2720 AUDIT_ARG(mode, fopen.o_mode);
2721 VATTR_INIT(va);
2722 /* Mask off all but regular access permissions */
2723 cmode = ((fopen.o_mode & ~p->p_fd.fd_cmask) & ALLPERMS) & ~S_ISTXT;
2724 VATTR_SET(va, va_mode, cmode & ACCESSPERMS);
2725
2726 SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2727
2728 /* Start the lookup relative to the file descriptor's vnode. */
2729 NDINIT(nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2730 fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
2731 nd->ni_dvp = vp;
2732
2733 error = open1(has_entitlement ? &context : vfs_context_current(),
2734 nd, fopen.o_flags, va, NULL, NULL, retval);
2735
2736 kfree_type(struct vnode_attr, va);
2737 kfree_type(struct nameidata, nd);
2738
2739 vnode_put(vp);
2740
2741 outdrop:
2742 return sys_fcntl_outdrop(p, fd, fp, vp, error);
2743 }
2744
2745 int
sys_fcntl_nocancel(proc_t p,struct fcntl_nocancel_args * uap,int32_t * retval)2746 sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
2747 {
2748 int fd = uap->fd;
2749 int cmd = uap->cmd;
2750 struct filedesc *fdp = &p->p_fd;
2751 struct fileproc *fp;
2752 struct vnode *vp = NULLVP; /* for AUDIT_ARG() at end */
2753 unsigned int oflags, nflags;
2754 int i, tmp, error, error2, flg = 0;
2755 struct flock fl = {};
2756 struct flocktimeout fltimeout;
2757 struct timespec *timeout = NULL;
2758 off_t offset;
2759 int newmin;
2760 daddr64_t lbn, bn;
2761 unsigned int fflag;
2762 user_addr_t argp;
2763 boolean_t is64bit;
2764 int has_entitlement = 0;
2765
2766 AUDIT_ARG(fd, uap->fd);
2767 AUDIT_ARG(cmd, uap->cmd);
2768
2769 proc_fdlock(p);
2770 if ((error = fp_lookup(p, fd, &fp, 1))) {
2771 proc_fdunlock(p);
2772 return error;
2773 }
2774
2775 SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2776
2777 is64bit = proc_is64bit(p);
2778 if (is64bit) {
2779 argp = uap->arg;
2780 } else {
2781 /*
2782 * Since the arg parameter is defined as a long but may be
2783 * either a long or a pointer we must take care to handle
2784 * sign extension issues. Our sys call munger will sign
2785 * extend a long when we are called from a 32-bit process.
2786 * Since we can never have an address greater than 32-bits
2787 * from a 32-bit process we lop off the top 32-bits to avoid
2788 * getting the wrong address
2789 */
2790 argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
2791 }
2792
2793 #if CONFIG_MACF
2794 error = mac_file_check_fcntl(kauth_cred_get(), fp->fp_glob, cmd, uap->arg);
2795 if (error) {
2796 goto out;
2797 }
2798 #endif
2799
2800 switch (cmd) {
2801 case F_DUPFD:
2802 case F_DUPFD_CLOEXEC:
2803 if (fp_isguarded(fp, GUARD_DUP)) {
2804 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
2805 goto out;
2806 }
2807 newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2808 AUDIT_ARG(value32, newmin);
2809 if (newmin < 0 || newmin >= proc_limitgetcur_nofile(p)) {
2810 error = EINVAL;
2811 goto out;
2812 }
2813 if ((error = fdalloc(p, newmin, &i))) {
2814 goto out;
2815 }
2816 error = finishdup(p, fdp, fd, i,
2817 cmd == F_DUPFD_CLOEXEC ? FP_CLOEXEC : 0, retval);
2818 goto out;
2819
2820 case F_GETFD:
2821 *retval = (fp->fp_flags & FP_CLOEXEC) ? FD_CLOEXEC : 0;
2822 error = 0;
2823 goto out;
2824
2825 case F_SETFD:
2826 AUDIT_ARG(value32, (uint32_t)uap->arg);
2827 if (uap->arg & FD_CLOEXEC) {
2828 fp->fp_flags |= FP_CLOEXEC;
2829 error = 0;
2830 } else if (!fp->fp_guard_attrs) {
2831 fp->fp_flags &= ~FP_CLOEXEC;
2832 error = 0;
2833 } else {
2834 error = fp_guard_exception(p,
2835 fd, fp, kGUARD_EXC_NOCLOEXEC);
2836 }
2837 goto out;
2838
2839 case F_GETFL:
2840 *retval = OFLAGS(fp->f_flag);
2841 error = 0;
2842 goto out;
2843
2844 case F_SETFL:
2845 // FIXME (rdar://54898652)
2846 //
2847 // this code is broken if fnctl(F_SETFL), ioctl() are
2848 // called concurrently for the same fileglob.
2849
2850 tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2851 AUDIT_ARG(value32, tmp);
2852
2853 os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
2854 nflags = oflags & ~FCNTLFLAGS;
2855 nflags |= FFLAGS(tmp) & FCNTLFLAGS;
2856 });
2857 tmp = nflags & FNONBLOCK;
2858 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2859 if (error) {
2860 goto out;
2861 }
2862 tmp = nflags & FASYNC;
2863 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
2864 if (!error) {
2865 goto out;
2866 }
2867 os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
2868 tmp = 0;
2869 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2870 goto out;
2871
2872 case F_GETOWN:
2873 if (fp->f_type == DTYPE_SOCKET) {
2874 *retval = ((struct socket *)fp_get_data(fp))->so_pgid;
2875 error = 0;
2876 goto out;
2877 }
2878 error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
2879 *retval = -*retval;
2880 goto out;
2881
2882 case F_SETOWN:
2883 tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
2884 AUDIT_ARG(value32, tmp);
2885 if (fp->f_type == DTYPE_SOCKET) {
2886 ((struct socket *)fp_get_data(fp))->so_pgid = tmp;
2887 error = 0;
2888 goto out;
2889 }
2890 if (fp->f_type == DTYPE_PIPE) {
2891 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2892 goto out;
2893 }
2894
2895 if (tmp <= 0) {
2896 tmp = -tmp;
2897 } else {
2898 proc_t p1 = proc_find(tmp);
2899 if (p1 == 0) {
2900 error = ESRCH;
2901 goto out;
2902 }
2903 tmp = (int)p1->p_pgrpid;
2904 proc_rele(p1);
2905 }
2906 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2907 goto out;
2908
2909 case F_SETNOSIGPIPE:
2910 tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
2911 if (fp->f_type == DTYPE_SOCKET) {
2912 #if SOCKETS
2913 error = sock_setsockopt((struct socket *)fp_get_data(fp),
2914 SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
2915 #else
2916 error = EINVAL;
2917 #endif
2918 } else {
2919 struct fileglob *fg = fp->fp_glob;
2920
2921 lck_mtx_lock_spin(&fg->fg_lock);
2922 if (tmp) {
2923 fg->fg_lflags |= FG_NOSIGPIPE;
2924 } else {
2925 fg->fg_lflags &= ~FG_NOSIGPIPE;
2926 }
2927 lck_mtx_unlock(&fg->fg_lock);
2928 error = 0;
2929 }
2930 goto out;
2931
2932 case F_GETNOSIGPIPE:
2933 if (fp->f_type == DTYPE_SOCKET) {
2934 #if SOCKETS
2935 int retsize = sizeof(*retval);
2936 error = sock_getsockopt((struct socket *)fp_get_data(fp),
2937 SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
2938 #else
2939 error = EINVAL;
2940 #endif
2941 } else {
2942 *retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
2943 1 : 0;
2944 error = 0;
2945 }
2946 goto out;
2947
2948 case F_SETCONFINED:
2949 /*
2950 * If this is the only reference to this fglob in the process
2951 * and it's already marked as close-on-fork then mark it as
2952 * (immutably) "confined" i.e. any fd that points to it will
2953 * forever be close-on-fork, and attempts to use an IPC
2954 * mechanism to move the descriptor elsewhere will fail.
2955 */
2956 if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
2957 struct fileglob *fg = fp->fp_glob;
2958
2959 lck_mtx_lock_spin(&fg->fg_lock);
2960 if (fg->fg_lflags & FG_CONFINED) {
2961 error = 0;
2962 } else if (1 != os_ref_get_count_raw(&fg->fg_count)) {
2963 error = EAGAIN; /* go close the dup .. */
2964 } else if (fp->fp_flags & FP_CLOFORK) {
2965 fg->fg_lflags |= FG_CONFINED;
2966 error = 0;
2967 } else {
2968 error = EBADF; /* open without O_CLOFORK? */
2969 }
2970 lck_mtx_unlock(&fg->fg_lock);
2971 } else {
2972 /*
2973 * Other subsystems may have built on the immutability
2974 * of FG_CONFINED; clearing it may be tricky.
2975 */
2976 error = EPERM; /* immutable */
2977 }
2978 goto out;
2979
2980 case F_GETCONFINED:
2981 *retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? 1 : 0;
2982 error = 0;
2983 goto out;
2984
2985 case F_SETLKWTIMEOUT:
2986 case F_SETLKW:
2987 case F_OFD_SETLKWTIMEOUT:
2988 case F_OFD_SETLKW:
2989 flg |= F_WAIT;
2990 OS_FALLTHROUGH;
2991
2992 case F_SETLK:
2993 case F_OFD_SETLK:
2994 if (fp->f_type != DTYPE_VNODE) {
2995 error = EBADF;
2996 goto out;
2997 }
2998 vp = (struct vnode *)fp_get_data(fp);
2999
3000 fflag = fp->f_flag;
3001 offset = fp->f_offset;
3002 proc_fdunlock(p);
3003
3004 /* Copy in the lock structure */
3005 if (F_SETLKWTIMEOUT == cmd || F_OFD_SETLKWTIMEOUT == cmd) {
3006 error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
3007 if (error) {
3008 goto outdrop;
3009 }
3010 fl = fltimeout.fl;
3011 timeout = &fltimeout.timeout;
3012 } else {
3013 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3014 if (error) {
3015 goto outdrop;
3016 }
3017 }
3018
3019 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3020 /* and ending byte for EOVERFLOW in SEEK_SET */
3021 error = check_file_seek_range(&fl, offset);
3022 if (error) {
3023 goto outdrop;
3024 }
3025
3026 if ((error = vnode_getwithref(vp))) {
3027 goto outdrop;
3028 }
3029 if (fl.l_whence == SEEK_CUR) {
3030 fl.l_start += offset;
3031 }
3032
3033 #if CONFIG_MACF
3034 error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3035 F_SETLK, &fl);
3036 if (error) {
3037 (void)vnode_put(vp);
3038 goto outdrop;
3039 }
3040 #endif
3041 switch (cmd) {
3042 case F_OFD_SETLK:
3043 case F_OFD_SETLKW:
3044 case F_OFD_SETLKWTIMEOUT:
3045 flg |= F_OFD_LOCK;
3046 switch (fl.l_type) {
3047 case F_RDLCK:
3048 if ((fflag & FREAD) == 0) {
3049 error = EBADF;
3050 break;
3051 }
3052 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3053 F_SETLK, &fl, flg, &context, timeout);
3054 break;
3055 case F_WRLCK:
3056 if ((fflag & FWRITE) == 0) {
3057 error = EBADF;
3058 break;
3059 }
3060 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3061 F_SETLK, &fl, flg, &context, timeout);
3062 break;
3063 case F_UNLCK:
3064 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3065 F_UNLCK, &fl, F_OFD_LOCK, &context,
3066 timeout);
3067 break;
3068 default:
3069 error = EINVAL;
3070 break;
3071 }
3072 if (0 == error &&
3073 (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
3074 struct fileglob *fg = fp->fp_glob;
3075
3076 /*
3077 * arrange F_UNLCK on last close (once
3078 * set, FG_HAS_OFDLOCK is immutable)
3079 */
3080 if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
3081 lck_mtx_lock_spin(&fg->fg_lock);
3082 fg->fg_lflags |= FG_HAS_OFDLOCK;
3083 lck_mtx_unlock(&fg->fg_lock);
3084 }
3085 }
3086 break;
3087 default:
3088 flg |= F_POSIX;
3089 switch (fl.l_type) {
3090 case F_RDLCK:
3091 if ((fflag & FREAD) == 0) {
3092 error = EBADF;
3093 break;
3094 }
3095 // XXX UInt32 unsafe for LP64 kernel
3096 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3097 error = VNOP_ADVLOCK(vp, (caddr_t)p,
3098 F_SETLK, &fl, flg, &context, timeout);
3099 break;
3100 case F_WRLCK:
3101 if ((fflag & FWRITE) == 0) {
3102 error = EBADF;
3103 break;
3104 }
3105 // XXX UInt32 unsafe for LP64 kernel
3106 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3107 error = VNOP_ADVLOCK(vp, (caddr_t)p,
3108 F_SETLK, &fl, flg, &context, timeout);
3109 break;
3110 case F_UNLCK:
3111 error = VNOP_ADVLOCK(vp, (caddr_t)p,
3112 F_UNLCK, &fl, F_POSIX, &context, timeout);
3113 break;
3114 default:
3115 error = EINVAL;
3116 break;
3117 }
3118 break;
3119 }
3120 (void) vnode_put(vp);
3121 goto outdrop;
3122
3123 case F_GETLK:
3124 case F_OFD_GETLK:
3125 case F_GETLKPID:
3126 case F_OFD_GETLKPID:
3127 if (fp->f_type != DTYPE_VNODE) {
3128 error = EBADF;
3129 goto out;
3130 }
3131 vp = (struct vnode *)fp_get_data(fp);
3132
3133 offset = fp->f_offset;
3134 proc_fdunlock(p);
3135
3136 /* Copy in the lock structure */
3137 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3138 if (error) {
3139 goto outdrop;
3140 }
3141
3142 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3143 /* and ending byte for EOVERFLOW in SEEK_SET */
3144 error = check_file_seek_range(&fl, offset);
3145 if (error) {
3146 goto outdrop;
3147 }
3148
3149 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
3150 error = EINVAL;
3151 goto outdrop;
3152 }
3153
3154 switch (fl.l_type) {
3155 case F_RDLCK:
3156 case F_UNLCK:
3157 case F_WRLCK:
3158 break;
3159 default:
3160 error = EINVAL;
3161 goto outdrop;
3162 }
3163
3164 switch (fl.l_whence) {
3165 case SEEK_CUR:
3166 case SEEK_SET:
3167 case SEEK_END:
3168 break;
3169 default:
3170 error = EINVAL;
3171 goto outdrop;
3172 }
3173
3174 if ((error = vnode_getwithref(vp)) == 0) {
3175 if (fl.l_whence == SEEK_CUR) {
3176 fl.l_start += offset;
3177 }
3178
3179 #if CONFIG_MACF
3180 error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3181 cmd, &fl);
3182 if (error == 0)
3183 #endif
3184 switch (cmd) {
3185 case F_OFD_GETLK:
3186 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3187 F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
3188 break;
3189 case F_OFD_GETLKPID:
3190 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3191 F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
3192 break;
3193 default:
3194 error = VNOP_ADVLOCK(vp, (caddr_t)p,
3195 cmd, &fl, F_POSIX, &context, NULL);
3196 break;
3197 }
3198
3199 (void)vnode_put(vp);
3200
3201 if (error == 0) {
3202 error = copyout((caddr_t)&fl, argp, sizeof(fl));
3203 }
3204 }
3205 goto outdrop;
3206
3207 case F_PREALLOCATE: {
3208 fstore_t alloc_struct; /* structure for allocate command */
3209 u_int32_t alloc_flags = 0;
3210
3211 if (fp->f_type != DTYPE_VNODE) {
3212 error = EBADF;
3213 goto out;
3214 }
3215
3216 vp = (struct vnode *)fp_get_data(fp);
3217 proc_fdunlock(p);
3218
3219 /* make sure that we have write permission */
3220 if ((fp->f_flag & FWRITE) == 0) {
3221 error = EBADF;
3222 goto outdrop;
3223 }
3224
3225 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
3226 if (error) {
3227 goto outdrop;
3228 }
3229
3230 /* now set the space allocated to 0 */
3231 alloc_struct.fst_bytesalloc = 0;
3232
3233 /*
3234 * Do some simple parameter checking
3235 */
3236
3237 /* set up the flags */
3238
3239 alloc_flags |= PREALLOCATE;
3240
3241 if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
3242 alloc_flags |= ALLOCATECONTIG;
3243 }
3244
3245 if (alloc_struct.fst_flags & F_ALLOCATEALL) {
3246 alloc_flags |= ALLOCATEALL;
3247 }
3248
3249 /*
3250 * Do any position mode specific stuff. The only
3251 * position mode supported now is PEOFPOSMODE
3252 */
3253
3254 switch (alloc_struct.fst_posmode) {
3255 case F_PEOFPOSMODE:
3256 if (alloc_struct.fst_offset != 0) {
3257 error = EINVAL;
3258 goto outdrop;
3259 }
3260
3261 alloc_flags |= ALLOCATEFROMPEOF;
3262 break;
3263
3264 case F_VOLPOSMODE:
3265 if (alloc_struct.fst_offset <= 0) {
3266 error = EINVAL;
3267 goto outdrop;
3268 }
3269
3270 alloc_flags |= ALLOCATEFROMVOL;
3271 break;
3272
3273 default: {
3274 error = EINVAL;
3275 goto outdrop;
3276 }
3277 }
3278 if ((error = vnode_getwithref(vp)) == 0) {
3279 /*
3280 * call allocate to get the space
3281 */
3282 error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
3283 &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
3284 &context);
3285 (void)vnode_put(vp);
3286
3287 error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
3288
3289 if (error == 0) {
3290 error = error2;
3291 }
3292 }
3293 goto outdrop;
3294 }
3295 case F_PUNCHHOLE: {
3296 fpunchhole_t args;
3297
3298 if (fp->f_type != DTYPE_VNODE) {
3299 error = EBADF;
3300 goto out;
3301 }
3302
3303 vp = (struct vnode *)fp_get_data(fp);
3304 proc_fdunlock(p);
3305
3306 /* need write permissions */
3307 if ((fp->f_flag & FWRITE) == 0) {
3308 error = EPERM;
3309 goto outdrop;
3310 }
3311
3312 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3313 goto outdrop;
3314 }
3315
3316 if ((error = vnode_getwithref(vp))) {
3317 goto outdrop;
3318 }
3319
3320 #if CONFIG_MACF
3321 if ((error = mac_vnode_check_write(&context, fp->fp_glob->fg_cred, vp))) {
3322 (void)vnode_put(vp);
3323 goto outdrop;
3324 }
3325 #endif
3326
3327 error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
3328 (void)vnode_put(vp);
3329
3330 goto outdrop;
3331 }
3332 case F_TRIM_ACTIVE_FILE: {
3333 ftrimactivefile_t args;
3334
3335 if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
3336 error = EACCES;
3337 goto out;
3338 }
3339
3340 if (fp->f_type != DTYPE_VNODE) {
3341 error = EBADF;
3342 goto out;
3343 }
3344
3345 vp = (struct vnode *)fp_get_data(fp);
3346 proc_fdunlock(p);
3347
3348 /* need write permissions */
3349 if ((fp->f_flag & FWRITE) == 0) {
3350 error = EPERM;
3351 goto outdrop;
3352 }
3353
3354 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3355 goto outdrop;
3356 }
3357
3358 if ((error = vnode_getwithref(vp))) {
3359 goto outdrop;
3360 }
3361
3362 error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
3363 (void)vnode_put(vp);
3364
3365 goto outdrop;
3366 }
3367 case F_SPECULATIVE_READ: {
3368 fspecread_t args;
3369 off_t temp_length = 0;
3370
3371 if (fp->f_type != DTYPE_VNODE) {
3372 error = EBADF;
3373 goto out;
3374 }
3375
3376 vp = (struct vnode *)fp_get_data(fp);
3377 proc_fdunlock(p);
3378
3379 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3380 goto outdrop;
3381 }
3382
3383 /* Discard invalid offsets or lengths */
3384 if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
3385 error = EINVAL;
3386 goto outdrop;
3387 }
3388
3389 /*
3390 * Round the file offset down to a page-size boundary (or to 0).
3391 * The filesystem will need to round the length up to the end of the page boundary
3392 * or to the EOF of the file.
3393 */
3394 uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
3395 uint64_t foff_delta = args.fsr_offset - foff;
3396 args.fsr_offset = (off_t) foff;
3397
3398 /*
3399 * Now add in the delta to the supplied length. Since we may have adjusted the
3400 * offset, increase it by the amount that we adjusted.
3401 */
3402 if (os_add_overflow(args.fsr_length, foff_delta, &args.fsr_length)) {
3403 error = EOVERFLOW;
3404 goto outdrop;
3405 }
3406
3407 /*
3408 * Make sure (fsr_offset + fsr_length) does not overflow.
3409 */
3410 if (os_add_overflow(args.fsr_offset, args.fsr_length, &temp_length)) {
3411 error = EOVERFLOW;
3412 goto outdrop;
3413 }
3414
3415 if ((error = vnode_getwithref(vp))) {
3416 goto outdrop;
3417 }
3418 error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
3419 (void)vnode_put(vp);
3420
3421 goto outdrop;
3422 }
3423 case F_SETSIZE:
3424 if (fp->f_type != DTYPE_VNODE) {
3425 error = EBADF;
3426 goto out;
3427 }
3428 vp = (struct vnode *)fp_get_data(fp);
3429 proc_fdunlock(p);
3430
3431 error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
3432 if (error) {
3433 goto outdrop;
3434 }
3435 AUDIT_ARG(value64, offset);
3436
3437 error = vnode_getwithref(vp);
3438 if (error) {
3439 goto outdrop;
3440 }
3441
3442 #if CONFIG_MACF
3443 error = mac_vnode_check_truncate(&context,
3444 fp->fp_glob->fg_cred, vp);
3445 if (error) {
3446 (void)vnode_put(vp);
3447 goto outdrop;
3448 }
3449 #endif
3450 /*
3451 * Make sure that we are root. Growing a file
3452 * without zero filling the data is a security hole.
3453 */
3454 if (!kauth_cred_issuser(kauth_cred_get())) {
3455 error = EACCES;
3456 } else {
3457 /*
3458 * Require privilege to change file size without zerofill,
3459 * else will change the file size and zerofill it.
3460 */
3461 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_SETSIZE, 0);
3462 if (error == 0) {
3463 error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
3464 } else {
3465 error = vnode_setsize(vp, offset, 0, &context);
3466 }
3467
3468 #if CONFIG_MACF
3469 if (error == 0) {
3470 mac_vnode_notify_truncate(&context, fp->fp_glob->fg_cred, vp);
3471 }
3472 #endif
3473 }
3474
3475 (void)vnode_put(vp);
3476 goto outdrop;
3477
3478 case F_RDAHEAD:
3479 if (fp->f_type != DTYPE_VNODE) {
3480 error = EBADF;
3481 goto out;
3482 }
3483 if (uap->arg) {
3484 os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3485 } else {
3486 os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3487 }
3488 goto out;
3489
3490 case F_NOCACHE:
3491 if (fp->f_type != DTYPE_VNODE) {
3492 error = EBADF;
3493 goto out;
3494 }
3495 if (uap->arg) {
3496 os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3497 } else {
3498 os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3499 }
3500 goto out;
3501
3502 case F_NODIRECT:
3503 if (fp->f_type != DTYPE_VNODE) {
3504 error = EBADF;
3505 goto out;
3506 }
3507 if (uap->arg) {
3508 os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3509 } else {
3510 os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3511 }
3512 goto out;
3513
3514 case F_SINGLE_WRITER:
3515 if (fp->f_type != DTYPE_VNODE) {
3516 error = EBADF;
3517 goto out;
3518 }
3519 if (uap->arg) {
3520 os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3521 } else {
3522 os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3523 }
3524 goto out;
3525
3526 case F_GLOBAL_NOCACHE:
3527 if (fp->f_type != DTYPE_VNODE) {
3528 error = EBADF;
3529 goto out;
3530 }
3531 vp = (struct vnode *)fp_get_data(fp);
3532 proc_fdunlock(p);
3533
3534 if ((error = vnode_getwithref(vp)) == 0) {
3535 *retval = vnode_isnocache(vp);
3536
3537 if (uap->arg) {
3538 vnode_setnocache(vp);
3539 } else {
3540 vnode_clearnocache(vp);
3541 }
3542
3543 (void)vnode_put(vp);
3544 }
3545 goto outdrop;
3546
3547 case F_CHECK_OPENEVT:
3548 if (fp->f_type != DTYPE_VNODE) {
3549 error = EBADF;
3550 goto out;
3551 }
3552 vp = (struct vnode *)fp_get_data(fp);
3553 proc_fdunlock(p);
3554
3555 if ((error = vnode_getwithref(vp)) == 0) {
3556 *retval = vnode_is_openevt(vp);
3557
3558 if (uap->arg) {
3559 vnode_set_openevt(vp);
3560 } else {
3561 vnode_clear_openevt(vp);
3562 }
3563
3564 (void)vnode_put(vp);
3565 }
3566 goto outdrop;
3567
3568 case F_RDADVISE: {
3569 struct radvisory ra_struct;
3570
3571 if (fp->f_type != DTYPE_VNODE) {
3572 error = EBADF;
3573 goto out;
3574 }
3575 vp = (struct vnode *)fp_get_data(fp);
3576 proc_fdunlock(p);
3577
3578 if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
3579 goto outdrop;
3580 }
3581 if (ra_struct.ra_offset < 0 || ra_struct.ra_count < 0) {
3582 error = EINVAL;
3583 goto outdrop;
3584 }
3585 if ((error = vnode_getwithref(vp)) == 0) {
3586 error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
3587
3588 (void)vnode_put(vp);
3589 }
3590 goto outdrop;
3591 }
3592
3593 case F_FLUSH_DATA:
3594
3595 if (fp->f_type != DTYPE_VNODE) {
3596 error = EBADF;
3597 goto out;
3598 }
3599 vp = (struct vnode *)fp_get_data(fp);
3600 proc_fdunlock(p);
3601
3602 if ((error = vnode_getwithref(vp)) == 0) {
3603 error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
3604
3605 (void)vnode_put(vp);
3606 }
3607 goto outdrop;
3608
3609 case F_LOG2PHYS:
3610 case F_LOG2PHYS_EXT: {
3611 struct log2phys l2p_struct = {}; /* structure for allocate command */
3612 int devBlockSize;
3613
3614 off_t file_offset = 0;
3615 size_t a_size = 0;
3616 size_t run = 0;
3617
3618 if (cmd == F_LOG2PHYS_EXT) {
3619 error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
3620 if (error) {
3621 goto out;
3622 }
3623 file_offset = l2p_struct.l2p_devoffset;
3624 } else {
3625 file_offset = fp->f_offset;
3626 }
3627 if (fp->f_type != DTYPE_VNODE) {
3628 error = EBADF;
3629 goto out;
3630 }
3631 vp = (struct vnode *)fp_get_data(fp);
3632 proc_fdunlock(p);
3633 if ((error = vnode_getwithref(vp))) {
3634 goto outdrop;
3635 }
3636 error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
3637 if (error) {
3638 (void)vnode_put(vp);
3639 goto outdrop;
3640 }
3641 error = VNOP_BLKTOOFF(vp, lbn, &offset);
3642 if (error) {
3643 (void)vnode_put(vp);
3644 goto outdrop;
3645 }
3646 devBlockSize = vfs_devblocksize(vnode_mount(vp));
3647 if (cmd == F_LOG2PHYS_EXT) {
3648 if (l2p_struct.l2p_contigbytes < 0) {
3649 vnode_put(vp);
3650 error = EINVAL;
3651 goto outdrop;
3652 }
3653
3654 a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
3655 } else {
3656 a_size = devBlockSize;
3657 }
3658
3659 error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
3660
3661 (void)vnode_put(vp);
3662
3663 if (!error) {
3664 l2p_struct.l2p_flags = 0; /* for now */
3665 if (cmd == F_LOG2PHYS_EXT) {
3666 l2p_struct.l2p_contigbytes = run - (file_offset - offset);
3667 } else {
3668 l2p_struct.l2p_contigbytes = 0; /* for now */
3669 }
3670
3671 /*
3672 * The block number being -1 suggests that the file offset is not backed
3673 * by any real blocks on-disk. As a result, just let it be passed back up wholesale.
3674 */
3675 if (bn == -1) {
3676 /* Don't multiply it by the block size */
3677 l2p_struct.l2p_devoffset = bn;
3678 } else {
3679 l2p_struct.l2p_devoffset = bn * devBlockSize;
3680 l2p_struct.l2p_devoffset += file_offset - offset;
3681 }
3682 error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
3683 }
3684 goto outdrop;
3685 }
3686 case F_GETPATH:
3687 case F_GETPATH_NOFIRMLINK: {
3688 char *pathbufp;
3689 int pathlen;
3690
3691 if (fp->f_type != DTYPE_VNODE) {
3692 error = EBADF;
3693 goto out;
3694 }
3695 vp = (struct vnode *)fp_get_data(fp);
3696 proc_fdunlock(p);
3697
3698 pathlen = MAXPATHLEN;
3699 pathbufp = zalloc(ZV_NAMEI);
3700
3701 if ((error = vnode_getwithref(vp)) == 0) {
3702 if (cmd == F_GETPATH_NOFIRMLINK) {
3703 error = vn_getpath_ext(vp, NULL, pathbufp, &pathlen, VN_GETPATH_NO_FIRMLINK);
3704 } else {
3705 error = vn_getpath(vp, pathbufp, &pathlen);
3706 }
3707 (void)vnode_put(vp);
3708
3709 if (error == 0) {
3710 error = copyout((caddr_t)pathbufp, argp, pathlen);
3711 }
3712 }
3713 zfree(ZV_NAMEI, pathbufp);
3714 goto outdrop;
3715 }
3716
3717 case F_PATHPKG_CHECK: {
3718 char *pathbufp;
3719 size_t pathlen;
3720
3721 if (fp->f_type != DTYPE_VNODE) {
3722 error = EBADF;
3723 goto out;
3724 }
3725 vp = (struct vnode *)fp_get_data(fp);
3726 proc_fdunlock(p);
3727
3728 pathlen = MAXPATHLEN;
3729 pathbufp = zalloc(ZV_NAMEI);
3730
3731 if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
3732 if ((error = vnode_getwithref(vp)) == 0) {
3733 AUDIT_ARG(text, pathbufp);
3734 error = vn_path_package_check(vp, pathbufp, (int)pathlen, retval);
3735
3736 (void)vnode_put(vp);
3737 }
3738 }
3739 zfree(ZV_NAMEI, pathbufp);
3740 goto outdrop;
3741 }
3742
3743 case F_CHKCLEAN: // used by regression tests to see if all dirty pages got cleaned by fsync()
3744 case F_FULLFSYNC: // fsync + flush the journal + DKIOCSYNCHRONIZE
3745 case F_BARRIERFSYNC: // fsync + barrier
3746 case F_FREEZE_FS: // freeze all other fs operations for the fs of this fd
3747 case F_THAW_FS: { // thaw all frozen fs operations for the fs of this fd
3748 if (fp->f_type != DTYPE_VNODE) {
3749 error = EBADF;
3750 goto out;
3751 }
3752 vp = (struct vnode *)fp_get_data(fp);
3753 proc_fdunlock(p);
3754
3755 if ((error = vnode_getwithref(vp)) == 0) {
3756 error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
3757
3758 (void)vnode_put(vp);
3759 }
3760 break;
3761 }
3762
3763 /*
3764 * SPI (private) for opening a file starting from a dir fd
3765 */
3766 case F_OPENFROM: {
3767 /* Check if this isn't a valid file descriptor */
3768 if (fp->f_type != DTYPE_VNODE) {
3769 error = EBADF;
3770 goto out;
3771 }
3772 vp = (struct vnode *)fp_get_data(fp);
3773
3774 return sys_fcntl__OPENFROM(p, fd, cmd, uap->arg, fp, vp, retval);
3775 }
3776
3777 /*
3778 * SPI (private) for unlinking a file starting from a dir fd
3779 */
3780 case F_UNLINKFROM: {
3781 user_addr_t pathname;
3782
3783 /* Check if this isn't a valid file descriptor */
3784 if ((fp->f_type != DTYPE_VNODE) ||
3785 (fp->f_flag & FREAD) == 0) {
3786 error = EBADF;
3787 goto out;
3788 }
3789 vp = (struct vnode *)fp_get_data(fp);
3790 proc_fdunlock(p);
3791
3792 if (vnode_getwithref(vp)) {
3793 error = ENOENT;
3794 goto outdrop;
3795 }
3796
3797 /* Only valid for directories */
3798 if (vp->v_type != VDIR) {
3799 vnode_put(vp);
3800 error = ENOTDIR;
3801 goto outdrop;
3802 }
3803
3804 /*
3805 * Only entitled apps may use the credentials of the thread
3806 * that opened the file descriptor.
3807 * Non-entitled threads will use their own context.
3808 */
3809 if (IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT)) {
3810 has_entitlement = 1;
3811 }
3812
3813 /* Get flags, mode and pathname arguments. */
3814 if (IS_64BIT_PROCESS(p)) {
3815 pathname = (user_addr_t)argp;
3816 } else {
3817 pathname = CAST_USER_ADDR_T(argp);
3818 }
3819
3820 /* Start the lookup relative to the file descriptor's vnode. */
3821 error = unlink1(has_entitlement ? &context : vfs_context_current(),
3822 vp, pathname, UIO_USERSPACE, 0);
3823
3824 vnode_put(vp);
3825 break;
3826 }
3827
3828 case F_ADDSIGS:
3829 case F_ADDFILESIGS:
3830 case F_ADDFILESIGS_FOR_DYLD_SIM:
3831 case F_ADDFILESIGS_RETURN:
3832 case F_ADDFILESIGS_INFO:
3833 {
3834 struct cs_blob *blob = NULL;
3835 struct user_fsignatures fs;
3836 kern_return_t kr;
3837 vm_offset_t kernel_blob_addr;
3838 vm_size_t kernel_blob_size;
3839 int blob_add_flags = 0;
3840 const size_t sizeof_fs = (cmd == F_ADDFILESIGS_INFO ?
3841 offsetof(struct user_fsignatures, fs_cdhash /* first output element */) :
3842 offsetof(struct user_fsignatures, fs_fsignatures_size /* compat */));
3843
3844 if (fp->f_type != DTYPE_VNODE) {
3845 error = EBADF;
3846 goto out;
3847 }
3848 vp = (struct vnode *)fp_get_data(fp);
3849 proc_fdunlock(p);
3850
3851 if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3852 blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
3853 if ((proc_getcsflags(p) & CS_KILL) == 0) {
3854 proc_lock(p);
3855 proc_csflags_set(p, CS_KILL);
3856 proc_unlock(p);
3857 }
3858 }
3859
3860 error = vnode_getwithref(vp);
3861 if (error) {
3862 goto outdrop;
3863 }
3864
3865 if (IS_64BIT_PROCESS(p)) {
3866 error = copyin(argp, &fs, sizeof_fs);
3867 } else {
3868 if (cmd == F_ADDFILESIGS_INFO) {
3869 error = EINVAL;
3870 vnode_put(vp);
3871 goto outdrop;
3872 }
3873
3874 struct user32_fsignatures fs32;
3875
3876 error = copyin(argp, &fs32, sizeof(fs32));
3877 fs.fs_file_start = fs32.fs_file_start;
3878 fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
3879 fs.fs_blob_size = fs32.fs_blob_size;
3880 }
3881
3882 if (error) {
3883 vnode_put(vp);
3884 goto outdrop;
3885 }
3886
3887 /*
3888 * First check if we have something loaded a this offset
3889 */
3890 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
3891 if (blob != NULL) {
3892 /* If this is for dyld_sim revalidate the blob */
3893 if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3894 error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
3895 if (error) {
3896 blob = NULL;
3897 if (error != EAGAIN) {
3898 vnode_put(vp);
3899 goto outdrop;
3900 }
3901 }
3902 }
3903 }
3904
3905 if (blob == NULL) {
3906 /*
3907 * An arbitrary limit, to prevent someone from mapping in a 20GB blob. This should cover
3908 * our use cases for the immediate future, but note that at the time of this commit, some
3909 * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
3910 *
3911 * We should consider how we can manage this more effectively; the above means that some
3912 * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
3913 * threshold considered ridiculous at the time of this change.
3914 */
3915 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
3916 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
3917 error = E2BIG;
3918 vnode_put(vp);
3919 goto outdrop;
3920 }
3921
3922 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
3923 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
3924 if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
3925 error = ENOMEM;
3926 vnode_put(vp);
3927 goto outdrop;
3928 }
3929
3930 if (cmd == F_ADDSIGS) {
3931 error = copyin(fs.fs_blob_start,
3932 (void *) kernel_blob_addr,
3933 fs.fs_blob_size);
3934 } else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM || F_ADDFILESIGS_INFO */
3935 int resid;
3936
3937 error = vn_rdwr(UIO_READ,
3938 vp,
3939 (caddr_t) kernel_blob_addr,
3940 (int)kernel_blob_size,
3941 fs.fs_file_start + fs.fs_blob_start,
3942 UIO_SYSSPACE,
3943 0,
3944 kauth_cred_get(),
3945 &resid,
3946 p);
3947 if ((error == 0) && resid) {
3948 /* kernel_blob_size rounded to a page size, but signature may be at end of file */
3949 memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
3950 }
3951 }
3952
3953 if (error) {
3954 ubc_cs_blob_deallocate(kernel_blob_addr,
3955 kernel_blob_size);
3956 vnode_put(vp);
3957 goto outdrop;
3958 }
3959
3960 blob = NULL;
3961 error = ubc_cs_blob_add(vp,
3962 proc_platform(p),
3963 CPU_TYPE_ANY, /* not for a specific architecture */
3964 CPU_SUBTYPE_ANY,
3965 fs.fs_file_start,
3966 &kernel_blob_addr,
3967 kernel_blob_size,
3968 NULL,
3969 blob_add_flags,
3970 &blob);
3971
3972 /* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
3973 if (error) {
3974 if (kernel_blob_addr) {
3975 ubc_cs_blob_deallocate(kernel_blob_addr,
3976 kernel_blob_size);
3977 }
3978 vnode_put(vp);
3979 goto outdrop;
3980 } else {
3981 #if CHECK_CS_VALIDATION_BITMAP
3982 ubc_cs_validation_bitmap_allocate( vp );
3983 #endif
3984 }
3985 }
3986
3987 if (cmd == F_ADDFILESIGS_RETURN || cmd == F_ADDFILESIGS_FOR_DYLD_SIM ||
3988 cmd == F_ADDFILESIGS_INFO) {
3989 /*
3990 * The first element of the structure is a
3991 * off_t that happen to have the same size for
3992 * all archs. Lets overwrite that.
3993 */
3994 off_t end_offset = 0;
3995 if (blob) {
3996 end_offset = blob->csb_end_offset;
3997 }
3998 error = copyout(&end_offset, argp, sizeof(end_offset));
3999
4000 if (error) {
4001 vnode_put(vp);
4002 goto outdrop;
4003 }
4004 }
4005
4006 if (cmd == F_ADDFILESIGS_INFO) {
4007 /* Return information. What we copy out depends on the size of the
4008 * passed in structure, to keep binary compatibility. */
4009
4010 if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
4011 // enough room for fs_cdhash[20]+fs_hash_type
4012
4013 if (blob != NULL) {
4014 error = copyout(blob->csb_cdhash,
4015 (vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
4016 USER_FSIGNATURES_CDHASH_LEN);
4017 if (error) {
4018 vnode_put(vp);
4019 goto outdrop;
4020 }
4021 int hashtype = cs_hash_type(blob->csb_hashtype);
4022 error = copyout(&hashtype,
4023 (vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
4024 sizeof(int));
4025 if (error) {
4026 vnode_put(vp);
4027 goto outdrop;
4028 }
4029 }
4030 }
4031 }
4032
4033 (void) vnode_put(vp);
4034 break;
4035 }
4036 #if CONFIG_SUPPLEMENTAL_SIGNATURES
4037 case F_ADDFILESUPPL:
4038 {
4039 struct vnode *ivp;
4040 struct cs_blob *blob = NULL;
4041 struct user_fsupplement fs;
4042 int orig_fd;
4043 struct fileproc* orig_fp = NULL;
4044 kern_return_t kr;
4045 vm_offset_t kernel_blob_addr;
4046 vm_size_t kernel_blob_size;
4047
4048 if (!IS_64BIT_PROCESS(p)) {
4049 error = EINVAL;
4050 goto out; // drop fp and unlock fds
4051 }
4052
4053 if (fp->f_type != DTYPE_VNODE) {
4054 error = EBADF;
4055 goto out;
4056 }
4057
4058 error = copyin(argp, &fs, sizeof(fs));
4059 if (error) {
4060 goto out;
4061 }
4062
4063 orig_fd = fs.fs_orig_fd;
4064 if ((error = fp_lookup(p, orig_fd, &orig_fp, 1))) {
4065 printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
4066 goto out;
4067 }
4068
4069 if (orig_fp->f_type != DTYPE_VNODE) {
4070 error = EBADF;
4071 fp_drop(p, orig_fd, orig_fp, 1);
4072 goto out;
4073 }
4074
4075 ivp = (struct vnode *)fp_get_data(orig_fp);
4076
4077 vp = (struct vnode *)fp_get_data(fp);
4078
4079 proc_fdunlock(p);
4080
4081 error = vnode_getwithref(ivp);
4082 if (error) {
4083 fp_drop(p, orig_fd, orig_fp, 0);
4084 goto outdrop; //drop fp
4085 }
4086
4087 error = vnode_getwithref(vp);
4088 if (error) {
4089 vnode_put(ivp);
4090 fp_drop(p, orig_fd, orig_fp, 0);
4091 goto outdrop;
4092 }
4093
4094 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
4095 error = E2BIG;
4096 goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
4097 }
4098
4099 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
4100 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
4101 if (kr != KERN_SUCCESS) {
4102 error = ENOMEM;
4103 goto dropboth;
4104 }
4105
4106 int resid;
4107 error = vn_rdwr(UIO_READ, vp,
4108 (caddr_t)kernel_blob_addr, (int)kernel_blob_size,
4109 fs.fs_file_start + fs.fs_blob_start,
4110 UIO_SYSSPACE, 0,
4111 kauth_cred_get(), &resid, p);
4112 if ((error == 0) && resid) {
4113 /* kernel_blob_size rounded to a page size, but signature may be at end of file */
4114 memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
4115 }
4116
4117 if (error) {
4118 ubc_cs_blob_deallocate(kernel_blob_addr,
4119 kernel_blob_size);
4120 goto dropboth;
4121 }
4122
4123 error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
4124 &kernel_blob_addr, kernel_blob_size, &blob);
4125
4126 /* ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed */
4127 if (error) {
4128 if (kernel_blob_addr) {
4129 ubc_cs_blob_deallocate(kernel_blob_addr,
4130 kernel_blob_size);
4131 }
4132 goto dropboth;
4133 }
4134 vnode_put(ivp);
4135 vnode_put(vp);
4136 fp_drop(p, orig_fd, orig_fp, 0);
4137 break;
4138
4139 dropboth:
4140 vnode_put(ivp);
4141 vnode_put(vp);
4142 fp_drop(p, orig_fd, orig_fp, 0);
4143 goto outdrop;
4144 }
4145 #endif
4146 case F_GETCODEDIR:
4147 case F_FINDSIGS: {
4148 error = ENOTSUP;
4149 goto out;
4150 }
4151 case F_CHECK_LV: {
4152 struct fileglob *fg;
4153 fchecklv_t lv = {};
4154
4155 if (fp->f_type != DTYPE_VNODE) {
4156 error = EBADF;
4157 goto out;
4158 }
4159 fg = fp->fp_glob;
4160 proc_fdunlock(p);
4161
4162 if (IS_64BIT_PROCESS(p)) {
4163 error = copyin(argp, &lv, sizeof(lv));
4164 } else {
4165 struct user32_fchecklv lv32 = {};
4166
4167 error = copyin(argp, &lv32, sizeof(lv32));
4168 lv.lv_file_start = lv32.lv_file_start;
4169 lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
4170 lv.lv_error_message_size = lv32.lv_error_message_size;
4171 }
4172 if (error) {
4173 goto outdrop;
4174 }
4175
4176 #if CONFIG_MACF
4177 error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
4178 (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
4179 #endif
4180
4181 break;
4182 }
4183 case F_GETSIGSINFO: {
4184 struct cs_blob *blob = NULL;
4185 fgetsigsinfo_t sigsinfo = {};
4186
4187 if (fp->f_type != DTYPE_VNODE) {
4188 error = EBADF;
4189 goto out;
4190 }
4191 vp = (struct vnode *)fp_get_data(fp);
4192 proc_fdunlock(p);
4193
4194 error = vnode_getwithref(vp);
4195 if (error) {
4196 goto outdrop;
4197 }
4198
4199 error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
4200 if (error) {
4201 vnode_put(vp);
4202 goto outdrop;
4203 }
4204
4205 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
4206 if (blob == NULL) {
4207 error = ENOENT;
4208 vnode_put(vp);
4209 goto outdrop;
4210 }
4211 switch (sigsinfo.fg_info_request) {
4212 case GETSIGSINFO_PLATFORM_BINARY:
4213 sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
4214 error = copyout(&sigsinfo.fg_sig_is_platform,
4215 (vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
4216 sizeof(sigsinfo.fg_sig_is_platform));
4217 if (error) {
4218 vnode_put(vp);
4219 goto outdrop;
4220 }
4221 break;
4222 default:
4223 error = EINVAL;
4224 vnode_put(vp);
4225 goto outdrop;
4226 }
4227 vnode_put(vp);
4228 break;
4229 }
4230 #if CONFIG_PROTECT
4231 case F_GETPROTECTIONCLASS: {
4232 if (fp->f_type != DTYPE_VNODE) {
4233 error = EBADF;
4234 goto out;
4235 }
4236 vp = (struct vnode *)fp_get_data(fp);
4237
4238 proc_fdunlock(p);
4239
4240 if (vnode_getwithref(vp)) {
4241 error = ENOENT;
4242 goto outdrop;
4243 }
4244
4245 struct vnode_attr va;
4246
4247 VATTR_INIT(&va);
4248 VATTR_WANTED(&va, va_dataprotect_class);
4249 error = VNOP_GETATTR(vp, &va, &context);
4250 if (!error) {
4251 if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
4252 *retval = va.va_dataprotect_class;
4253 } else {
4254 error = ENOTSUP;
4255 }
4256 }
4257
4258 vnode_put(vp);
4259 break;
4260 }
4261
4262 case F_SETPROTECTIONCLASS: {
4263 /* tmp must be a valid PROTECTION_CLASS_* */
4264 tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4265
4266 if (fp->f_type != DTYPE_VNODE) {
4267 error = EBADF;
4268 goto out;
4269 }
4270 vp = (struct vnode *)fp_get_data(fp);
4271
4272 proc_fdunlock(p);
4273
4274 if (vnode_getwithref(vp)) {
4275 error = ENOENT;
4276 goto outdrop;
4277 }
4278
4279 /* Only go forward if you have write access */
4280 vfs_context_t ctx = vfs_context_current();
4281 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4282 vnode_put(vp);
4283 error = EBADF;
4284 goto outdrop;
4285 }
4286
4287 struct vnode_attr va;
4288
4289 VATTR_INIT(&va);
4290 VATTR_SET(&va, va_dataprotect_class, tmp);
4291
4292 error = VNOP_SETATTR(vp, &va, ctx);
4293
4294 vnode_put(vp);
4295 break;
4296 }
4297
4298 case F_TRANSCODEKEY: {
4299 if (fp->f_type != DTYPE_VNODE) {
4300 error = EBADF;
4301 goto out;
4302 }
4303
4304 vp = (struct vnode *)fp_get_data(fp);
4305 proc_fdunlock(p);
4306
4307 if (vnode_getwithref(vp)) {
4308 error = ENOENT;
4309 goto outdrop;
4310 }
4311
4312 cp_key_t k = {
4313 .len = CP_MAX_WRAPPEDKEYSIZE,
4314 };
4315
4316 k.key = kalloc_data(CP_MAX_WRAPPEDKEYSIZE, Z_WAITOK | Z_ZERO);
4317 if (k.key == NULL) {
4318 error = ENOMEM;
4319 } else {
4320 error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
4321 }
4322
4323 vnode_put(vp);
4324
4325 if (error == 0) {
4326 error = copyout(k.key, argp, k.len);
4327 *retval = k.len;
4328 }
4329 kfree_data(k.key, CP_MAX_WRAPPEDKEYSIZE);
4330
4331 break;
4332 }
4333
4334 case F_GETPROTECTIONLEVEL: {
4335 if (fp->f_type != DTYPE_VNODE) {
4336 error = EBADF;
4337 goto out;
4338 }
4339
4340 vp = (struct vnode*)fp_get_data(fp);
4341 proc_fdunlock(p);
4342
4343 if (vnode_getwithref(vp)) {
4344 error = ENOENT;
4345 goto outdrop;
4346 }
4347
4348 error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
4349
4350 vnode_put(vp);
4351 break;
4352 }
4353
4354 case F_GETDEFAULTPROTLEVEL: {
4355 if (fp->f_type != DTYPE_VNODE) {
4356 error = EBADF;
4357 goto out;
4358 }
4359
4360 vp = (struct vnode*)fp_get_data(fp);
4361 proc_fdunlock(p);
4362
4363 if (vnode_getwithref(vp)) {
4364 error = ENOENT;
4365 goto outdrop;
4366 }
4367
4368 /*
4369 * if cp_get_major_vers fails, error will be set to proper errno
4370 * and cp_version will still be 0.
4371 */
4372
4373 error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
4374
4375 vnode_put(vp);
4376 break;
4377 }
4378
4379 #endif /* CONFIG_PROTECT */
4380
4381 case F_MOVEDATAEXTENTS: {
4382 struct fileproc *fp2 = NULL;
4383 struct vnode *src_vp = NULLVP;
4384 struct vnode *dst_vp = NULLVP;
4385 /* We need to grab the 2nd FD out of the argments before moving on. */
4386 int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
4387
4388 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
4389 if (error) {
4390 goto out;
4391 }
4392
4393 if (fp->f_type != DTYPE_VNODE) {
4394 error = EBADF;
4395 goto out;
4396 }
4397
4398 /*
4399 * For now, special case HFS+ and APFS only, since this
4400 * is SPI.
4401 */
4402 src_vp = (struct vnode *)fp_get_data(fp);
4403 if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
4404 error = ENOTSUP;
4405 goto out;
4406 }
4407
4408 /*
4409 * Get the references before we start acquiring iocounts on the vnodes,
4410 * while we still hold the proc fd lock
4411 */
4412 if ((error = fp_lookup(p, fd2, &fp2, 1))) {
4413 error = EBADF;
4414 goto out;
4415 }
4416 if (fp2->f_type != DTYPE_VNODE) {
4417 fp_drop(p, fd2, fp2, 1);
4418 error = EBADF;
4419 goto out;
4420 }
4421 dst_vp = (struct vnode *)fp_get_data(fp2);
4422 if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
4423 fp_drop(p, fd2, fp2, 1);
4424 error = ENOTSUP;
4425 goto out;
4426 }
4427
4428 #if CONFIG_MACF
4429 /* Re-do MAC checks against the new FD, pass in a fake argument */
4430 error = mac_file_check_fcntl(kauth_cred_get(), fp2->fp_glob, cmd, 0);
4431 if (error) {
4432 fp_drop(p, fd2, fp2, 1);
4433 goto out;
4434 }
4435 #endif
4436 /* Audit the 2nd FD */
4437 AUDIT_ARG(fd, fd2);
4438
4439 proc_fdunlock(p);
4440
4441 if (vnode_getwithref(src_vp)) {
4442 fp_drop(p, fd2, fp2, 0);
4443 error = ENOENT;
4444 goto outdrop;
4445 }
4446 if (vnode_getwithref(dst_vp)) {
4447 vnode_put(src_vp);
4448 fp_drop(p, fd2, fp2, 0);
4449 error = ENOENT;
4450 goto outdrop;
4451 }
4452
4453 /*
4454 * Basic asserts; validate they are not the same and that
4455 * both live on the same filesystem.
4456 */
4457 if (dst_vp == src_vp) {
4458 vnode_put(src_vp);
4459 vnode_put(dst_vp);
4460 fp_drop(p, fd2, fp2, 0);
4461 error = EINVAL;
4462 goto outdrop;
4463 }
4464
4465 if (dst_vp->v_mount != src_vp->v_mount) {
4466 vnode_put(src_vp);
4467 vnode_put(dst_vp);
4468 fp_drop(p, fd2, fp2, 0);
4469 error = EXDEV;
4470 goto outdrop;
4471 }
4472
4473 /* Now we have a legit pair of FDs. Go to work */
4474
4475 /* Now check for write access to the target files */
4476 if (vnode_authorize(src_vp, NULLVP,
4477 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4478 vnode_put(src_vp);
4479 vnode_put(dst_vp);
4480 fp_drop(p, fd2, fp2, 0);
4481 error = EBADF;
4482 goto outdrop;
4483 }
4484
4485 if (vnode_authorize(dst_vp, NULLVP,
4486 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4487 vnode_put(src_vp);
4488 vnode_put(dst_vp);
4489 fp_drop(p, fd2, fp2, 0);
4490 error = EBADF;
4491 goto outdrop;
4492 }
4493
4494 /* Verify that both vps point to files and not directories */
4495 if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
4496 error = EINVAL;
4497 vnode_put(src_vp);
4498 vnode_put(dst_vp);
4499 fp_drop(p, fd2, fp2, 0);
4500 goto outdrop;
4501 }
4502
4503 /*
4504 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
4505 * We'll pass in our special bit indicating that the new behavior is expected
4506 */
4507
4508 error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
4509
4510 vnode_put(src_vp);
4511 vnode_put(dst_vp);
4512 fp_drop(p, fd2, fp2, 0);
4513 break;
4514 }
4515
4516 /*
4517 * SPI for making a file compressed.
4518 */
4519 case F_MAKECOMPRESSED: {
4520 uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4521
4522 if (fp->f_type != DTYPE_VNODE) {
4523 error = EBADF;
4524 goto out;
4525 }
4526
4527 vp = (struct vnode*)fp_get_data(fp);
4528 proc_fdunlock(p);
4529
4530 /* get the vnode */
4531 if (vnode_getwithref(vp)) {
4532 error = ENOENT;
4533 goto outdrop;
4534 }
4535
4536 /* Is it a file? */
4537 if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
4538 vnode_put(vp);
4539 error = EBADF;
4540 goto outdrop;
4541 }
4542
4543 /* invoke ioctl to pass off to FS */
4544 /* Only go forward if you have write access */
4545 vfs_context_t ctx = vfs_context_current();
4546 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4547 vnode_put(vp);
4548 error = EBADF;
4549 goto outdrop;
4550 }
4551
4552 error = VNOP_IOCTL(vp, cmd, (caddr_t)&gcounter, 0, &context);
4553
4554 vnode_put(vp);
4555 break;
4556 }
4557
4558 /*
4559 * SPI (private) for indicating to a filesystem that subsequent writes to
4560 * the open FD will written to the Fastflow.
4561 */
4562 case F_SET_GREEDY_MODE:
4563 /* intentionally drop through to the same handler as F_SETSTATIC.
4564 * both fcntls should pass the argument and their selector into VNOP_IOCTL.
4565 */
4566
4567 /*
4568 * SPI (private) for indicating to a filesystem that subsequent writes to
4569 * the open FD will represent static content.
4570 */
4571 case F_SETSTATICCONTENT: {
4572 caddr_t ioctl_arg = NULL;
4573
4574 if (uap->arg) {
4575 ioctl_arg = (caddr_t) 1;
4576 }
4577
4578 if (fp->f_type != DTYPE_VNODE) {
4579 error = EBADF;
4580 goto out;
4581 }
4582 vp = (struct vnode *)fp_get_data(fp);
4583 proc_fdunlock(p);
4584
4585 error = vnode_getwithref(vp);
4586 if (error) {
4587 error = ENOENT;
4588 goto outdrop;
4589 }
4590
4591 /* Only go forward if you have write access */
4592 vfs_context_t ctx = vfs_context_current();
4593 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4594 vnode_put(vp);
4595 error = EBADF;
4596 goto outdrop;
4597 }
4598
4599 error = VNOP_IOCTL(vp, cmd, ioctl_arg, 0, &context);
4600 (void)vnode_put(vp);
4601
4602 break;
4603 }
4604
4605 /*
4606 * SPI (private) for indicating to the lower level storage driver that the
4607 * subsequent writes should be of a particular IO type (burst, greedy, static),
4608 * or other flavors that may be necessary.
4609 */
4610 case F_SETIOTYPE: {
4611 caddr_t param_ptr;
4612 uint32_t param;
4613
4614 if (uap->arg) {
4615 /* extract 32 bits of flags from userland */
4616 param_ptr = (caddr_t) uap->arg;
4617 param = (uint32_t) param_ptr;
4618 } else {
4619 /* If no argument is specified, error out */
4620 error = EINVAL;
4621 goto out;
4622 }
4623
4624 /*
4625 * Validate the different types of flags that can be specified:
4626 * all of them are mutually exclusive for now.
4627 */
4628 switch (param) {
4629 case F_IOTYPE_ISOCHRONOUS:
4630 break;
4631
4632 default:
4633 error = EINVAL;
4634 goto out;
4635 }
4636
4637
4638 if (fp->f_type != DTYPE_VNODE) {
4639 error = EBADF;
4640 goto out;
4641 }
4642 vp = (struct vnode *)fp_get_data(fp);
4643 proc_fdunlock(p);
4644
4645 error = vnode_getwithref(vp);
4646 if (error) {
4647 error = ENOENT;
4648 goto outdrop;
4649 }
4650
4651 /* Only go forward if you have write access */
4652 vfs_context_t ctx = vfs_context_current();
4653 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4654 vnode_put(vp);
4655 error = EBADF;
4656 goto outdrop;
4657 }
4658
4659 error = VNOP_IOCTL(vp, cmd, param_ptr, 0, &context);
4660 (void)vnode_put(vp);
4661
4662 break;
4663 }
4664
4665 /*
4666 * Set the vnode pointed to by 'fd'
4667 * and tag it as the (potentially future) backing store
4668 * for another filesystem
4669 */
4670 case F_SETBACKINGSTORE: {
4671 if (fp->f_type != DTYPE_VNODE) {
4672 error = EBADF;
4673 goto out;
4674 }
4675
4676 vp = (struct vnode *)fp_get_data(fp);
4677
4678 if (vp->v_tag != VT_HFS) {
4679 error = EINVAL;
4680 goto out;
4681 }
4682 proc_fdunlock(p);
4683
4684 if (vnode_getwithref(vp)) {
4685 error = ENOENT;
4686 goto outdrop;
4687 }
4688
4689 /* only proceed if you have write access */
4690 vfs_context_t ctx = vfs_context_current();
4691 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4692 vnode_put(vp);
4693 error = EBADF;
4694 goto outdrop;
4695 }
4696
4697
4698 /* If arg != 0, set, otherwise unset */
4699 if (uap->arg) {
4700 error = VNOP_IOCTL(vp, cmd, (caddr_t)1, 0, &context);
4701 } else {
4702 error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
4703 }
4704
4705 vnode_put(vp);
4706 break;
4707 }
4708
4709 /*
4710 * like F_GETPATH, but special semantics for
4711 * the mobile time machine handler.
4712 */
4713 case F_GETPATH_MTMINFO: {
4714 char *pathbufp;
4715 int pathlen;
4716
4717 if (fp->f_type != DTYPE_VNODE) {
4718 error = EBADF;
4719 goto out;
4720 }
4721 vp = (struct vnode *)fp_get_data(fp);
4722 proc_fdunlock(p);
4723
4724 pathlen = MAXPATHLEN;
4725 pathbufp = zalloc(ZV_NAMEI);
4726
4727 if ((error = vnode_getwithref(vp)) == 0) {
4728 int backingstore = 0;
4729
4730 /* Check for error from vn_getpath before moving on */
4731 if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
4732 if (vp->v_tag == VT_HFS) {
4733 error = VNOP_IOCTL(vp, cmd, (caddr_t) &backingstore, 0, &context);
4734 }
4735 (void)vnode_put(vp);
4736
4737 if (error == 0) {
4738 error = copyout((caddr_t)pathbufp, argp, pathlen);
4739 }
4740 if (error == 0) {
4741 /*
4742 * If the copyout was successful, now check to ensure
4743 * that this vnode is not a BACKINGSTORE vnode. mtmd
4744 * wants the path regardless.
4745 */
4746 if (backingstore) {
4747 error = EBUSY;
4748 }
4749 }
4750 } else {
4751 (void)vnode_put(vp);
4752 }
4753 }
4754
4755 zfree(ZV_NAMEI, pathbufp);
4756 goto outdrop;
4757 }
4758
4759 case F_RECYCLE: {
4760 #if !DEBUG && !DEVELOPMENT
4761 bool allowed = false;
4762
4763 //
4764 // non-debug and non-development kernels have restrictions
4765 // on who can all this fcntl. the process has to be marked
4766 // with the dataless-manipulator entitlement and either the
4767 // process or thread have to be marked rapid-aging.
4768 //
4769 if (!vfs_context_is_dataless_manipulator(&context)) {
4770 error = EPERM;
4771 goto out;
4772 }
4773
4774 proc_t proc = vfs_context_proc(&context);
4775 if (proc && (proc->p_lflag & P_LRAGE_VNODES)) {
4776 allowed = true;
4777 } else {
4778 thread_t thr = vfs_context_thread(&context);
4779 if (thr) {
4780 struct uthread *ut = get_bsdthread_info(thr);
4781
4782 if (ut && (ut->uu_flag & UT_RAGE_VNODES)) {
4783 allowed = true;
4784 }
4785 }
4786 }
4787 if (!allowed) {
4788 error = EPERM;
4789 goto out;
4790 }
4791 #endif
4792
4793 if (fp->f_type != DTYPE_VNODE) {
4794 error = EBADF;
4795 goto out;
4796 }
4797 vp = (struct vnode *)fp_get_data(fp);
4798 proc_fdunlock(p);
4799
4800 vnode_recycle(vp);
4801 break;
4802 }
4803
4804 default:
4805 /*
4806 * This is an fcntl() that we d not recognize at this level;
4807 * if this is a vnode, we send it down into the VNOP_IOCTL
4808 * for this vnode; this can include special devices, and will
4809 * effectively overload fcntl() to send ioctl()'s.
4810 */
4811 if ((cmd & IOC_VOID) && (cmd & IOC_INOUT)) {
4812 error = EINVAL;
4813 goto out;
4814 }
4815
4816 /* Catch any now-invalid fcntl() selectors */
4817 switch (cmd) {
4818 case (int)APFSIOC_REVERT_TO_SNAPSHOT:
4819 case (int)FSIOC_FIOSEEKHOLE:
4820 case (int)FSIOC_FIOSEEKDATA:
4821 case (int)FSIOC_CAS_BSDFLAGS:
4822 case HFS_GET_BOOT_INFO:
4823 case HFS_SET_BOOT_INFO:
4824 case FIOPINSWAP:
4825 case F_MARKDEPENDENCY:
4826 case TIOCREVOKE:
4827 case TIOCREVOKECLEAR:
4828 error = EINVAL;
4829 goto out;
4830 default:
4831 break;
4832 }
4833
4834 if (fp->f_type != DTYPE_VNODE) {
4835 error = EBADF;
4836 goto out;
4837 }
4838 vp = (struct vnode *)fp_get_data(fp);
4839 proc_fdunlock(p);
4840
4841 if ((error = vnode_getwithref(vp)) == 0) {
4842 #define STK_PARAMS 128
4843 char stkbuf[STK_PARAMS] = {0};
4844 unsigned int size;
4845 caddr_t data, memp;
4846 /*
4847 * For this to work properly, we have to copy in the
4848 * ioctl() cmd argument if there is one; we must also
4849 * check that a command parameter, if present, does
4850 * not exceed the maximum command length dictated by
4851 * the number of bits we have available in the command
4852 * to represent a structure length. Finally, we have
4853 * to copy the results back out, if it is that type of
4854 * ioctl().
4855 */
4856 size = IOCPARM_LEN(cmd);
4857 if (size > IOCPARM_MAX) {
4858 (void)vnode_put(vp);
4859 error = EINVAL;
4860 break;
4861 }
4862
4863 memp = NULL;
4864 if (size > sizeof(stkbuf)) {
4865 memp = (caddr_t)kalloc_data(size, Z_WAITOK);
4866 if (memp == 0) {
4867 (void)vnode_put(vp);
4868 error = ENOMEM;
4869 goto outdrop;
4870 }
4871 data = memp;
4872 } else {
4873 data = &stkbuf[0];
4874 }
4875
4876 if (cmd & IOC_IN) {
4877 if (size) {
4878 /* structure */
4879 error = copyin(argp, data, size);
4880 if (error) {
4881 (void)vnode_put(vp);
4882 if (memp) {
4883 kfree_data(memp, size);
4884 }
4885 goto outdrop;
4886 }
4887
4888 /* Bzero the section beyond that which was needed */
4889 if (size <= sizeof(stkbuf)) {
4890 bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
4891 }
4892 } else {
4893 /* int */
4894 if (is64bit) {
4895 *(user_addr_t *)data = argp;
4896 } else {
4897 *(uint32_t *)data = (uint32_t)argp;
4898 }
4899 };
4900 } else if ((cmd & IOC_OUT) && size) {
4901 /*
4902 * Zero the buffer so the user always
4903 * gets back something deterministic.
4904 */
4905 bzero(data, size);
4906 } else if (cmd & IOC_VOID) {
4907 if (is64bit) {
4908 *(user_addr_t *)data = argp;
4909 } else {
4910 *(uint32_t *)data = (uint32_t)argp;
4911 }
4912 }
4913
4914 error = VNOP_IOCTL(vp, cmd, CAST_DOWN(caddr_t, data), 0, &context);
4915
4916 (void)vnode_put(vp);
4917
4918 /* Copy any output data to user */
4919 if (error == 0 && (cmd & IOC_OUT) && size) {
4920 error = copyout(data, argp, size);
4921 }
4922 if (memp) {
4923 kfree_data(memp, size);
4924 }
4925 }
4926 break;
4927 }
4928
4929 outdrop:
4930 return sys_fcntl_outdrop(p, fd, fp, vp, error);
4931
4932 out:
4933 return sys_fcntl_out(p, fd, fp, error);
4934 }
4935
4936
4937 /*
4938 * sys_close
4939 *
4940 * Description: The implementation of the close(2) system call
4941 *
4942 * Parameters: p Process in whose per process file table
4943 * the close is to occur
4944 * uap->fd fd to be closed
4945 * retval <unused>
4946 *
4947 * Returns: 0 Success
4948 * fp_lookup:EBADF Bad file descriptor
4949 * fp_guard_exception:??? Guarded file descriptor
4950 * close_internal:EBADF
4951 * close_internal:??? Anything returnable by a per-fileops
4952 * close function
4953 */
4954 int
sys_close(proc_t p,struct close_args * uap,__unused int32_t * retval)4955 sys_close(proc_t p, struct close_args *uap, __unused int32_t *retval)
4956 {
4957 __pthread_testcancel(1);
4958 return close_nocancel(p, uap->fd);
4959 }
4960
4961 int
sys_close_nocancel(proc_t p,struct close_nocancel_args * uap,__unused int32_t * retval)4962 sys_close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
4963 {
4964 return close_nocancel(p, uap->fd);
4965 }
4966
4967 int
close_nocancel(proc_t p,int fd)4968 close_nocancel(proc_t p, int fd)
4969 {
4970 struct fileproc *fp;
4971
4972 AUDIT_SYSCLOSE(p, fd);
4973
4974 proc_fdlock(p);
4975 if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
4976 proc_fdunlock(p);
4977 return EBADF;
4978 }
4979
4980 if (fp_isguarded(fp, GUARD_CLOSE)) {
4981 int error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
4982 proc_fdunlock(p);
4983 return error;
4984 }
4985
4986 return fp_close_and_unlock(p, fd, fp, 0);
4987 }
4988
4989
4990 /*
4991 * fstat
4992 *
4993 * Description: Return status information about a file descriptor.
4994 *
4995 * Parameters: p The process doing the fstat
4996 * fd The fd to stat
4997 * ub The user stat buffer
4998 * xsecurity The user extended security
4999 * buffer, or 0 if none
5000 * xsecurity_size The size of xsecurity, or 0
5001 * if no xsecurity
5002 * isstat64 Flag to indicate 64 bit version
5003 * for inode size, etc.
5004 *
5005 * Returns: 0 Success
5006 * EBADF
5007 * EFAULT
5008 * fp_lookup:EBADF Bad file descriptor
5009 * vnode_getwithref:???
5010 * copyout:EFAULT
5011 * vnode_getwithref:???
5012 * vn_stat:???
5013 * soo_stat:???
5014 * pipe_stat:???
5015 * pshm_stat:???
5016 * kqueue_stat:???
5017 *
5018 * Notes: Internal implementation for all other fstat() related
5019 * functions
5020 *
5021 * XXX switch on node type is bogus; need a stat in struct
5022 * XXX fileops instead.
5023 */
5024 static int
fstat(proc_t p,int fd,user_addr_t ub,user_addr_t xsecurity,user_addr_t xsecurity_size,int isstat64)5025 fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity,
5026 user_addr_t xsecurity_size, int isstat64)
5027 {
5028 struct fileproc *fp;
5029 union {
5030 struct stat sb;
5031 struct stat64 sb64;
5032 } source;
5033 union {
5034 struct user64_stat user64_sb;
5035 struct user32_stat user32_sb;
5036 struct user64_stat64 user64_sb64;
5037 struct user32_stat64 user32_sb64;
5038 } dest;
5039 int error, my_size;
5040 file_type_t type;
5041 caddr_t data;
5042 kauth_filesec_t fsec;
5043 user_size_t xsecurity_bufsize;
5044 vfs_context_t ctx = vfs_context_current();
5045 void * sbptr;
5046
5047
5048 AUDIT_ARG(fd, fd);
5049
5050 if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
5051 return error;
5052 }
5053 type = fp->f_type;
5054 data = (caddr_t)fp_get_data(fp);
5055 fsec = KAUTH_FILESEC_NONE;
5056
5057 sbptr = (void *)&source;
5058
5059 switch (type) {
5060 case DTYPE_VNODE:
5061 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
5062 /*
5063 * If the caller has the file open, and is not
5064 * requesting extended security information, we are
5065 * going to let them get the basic stat information.
5066 */
5067 if (xsecurity == USER_ADDR_NULL) {
5068 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
5069 fp->fp_glob->fg_cred);
5070 } else {
5071 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
5072 }
5073
5074 AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
5075 (void)vnode_put((vnode_t)data);
5076 }
5077 break;
5078
5079 #if SOCKETS
5080 case DTYPE_SOCKET:
5081 error = soo_stat((struct socket *)data, sbptr, isstat64);
5082 break;
5083 #endif /* SOCKETS */
5084
5085 case DTYPE_PIPE:
5086 error = pipe_stat((void *)data, sbptr, isstat64);
5087 break;
5088
5089 case DTYPE_PSXSHM:
5090 error = pshm_stat((void *)data, sbptr, isstat64);
5091 break;
5092
5093 case DTYPE_KQUEUE:
5094 error = kqueue_stat((void *)data, sbptr, isstat64, p);
5095 break;
5096
5097 default:
5098 error = EBADF;
5099 goto out;
5100 }
5101 if (error == 0) {
5102 caddr_t sbp;
5103
5104 if (isstat64 != 0) {
5105 source.sb64.st_lspare = 0;
5106 source.sb64.st_qspare[0] = 0LL;
5107 source.sb64.st_qspare[1] = 0LL;
5108
5109 if (IS_64BIT_PROCESS(p)) {
5110 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5111 my_size = sizeof(dest.user64_sb64);
5112 sbp = (caddr_t)&dest.user64_sb64;
5113 } else {
5114 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5115 my_size = sizeof(dest.user32_sb64);
5116 sbp = (caddr_t)&dest.user32_sb64;
5117 }
5118 } else {
5119 source.sb.st_lspare = 0;
5120 source.sb.st_qspare[0] = 0LL;
5121 source.sb.st_qspare[1] = 0LL;
5122 if (IS_64BIT_PROCESS(p)) {
5123 munge_user64_stat(&source.sb, &dest.user64_sb);
5124 my_size = sizeof(dest.user64_sb);
5125 sbp = (caddr_t)&dest.user64_sb;
5126 } else {
5127 munge_user32_stat(&source.sb, &dest.user32_sb);
5128 my_size = sizeof(dest.user32_sb);
5129 sbp = (caddr_t)&dest.user32_sb;
5130 }
5131 }
5132
5133 error = copyout(sbp, ub, my_size);
5134 }
5135
5136 /* caller wants extended security information? */
5137 if (xsecurity != USER_ADDR_NULL) {
5138 /* did we get any? */
5139 if (fsec == KAUTH_FILESEC_NONE) {
5140 if (susize(xsecurity_size, 0) != 0) {
5141 error = EFAULT;
5142 goto out;
5143 }
5144 } else {
5145 /* find the user buffer size */
5146 xsecurity_bufsize = fusize(xsecurity_size);
5147
5148 /* copy out the actual data size */
5149 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5150 error = EFAULT;
5151 goto out;
5152 }
5153
5154 /* if the caller supplied enough room, copy out to it */
5155 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
5156 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5157 }
5158 }
5159 }
5160 out:
5161 fp_drop(p, fd, fp, 0);
5162 if (fsec != NULL) {
5163 kauth_filesec_free(fsec);
5164 }
5165 return error;
5166 }
5167
5168
5169 /*
5170 * sys_fstat_extended
5171 *
5172 * Description: Extended version of fstat supporting returning extended
5173 * security information
5174 *
5175 * Parameters: p The process doing the fstat
5176 * uap->fd The fd to stat
5177 * uap->ub The user stat buffer
5178 * uap->xsecurity The user extended security
5179 * buffer, or 0 if none
5180 * uap->xsecurity_size The size of xsecurity, or 0
5181 *
5182 * Returns: 0 Success
5183 * !0 Errno (see fstat)
5184 */
5185 int
sys_fstat_extended(proc_t p,struct fstat_extended_args * uap,__unused int32_t * retval)5186 sys_fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
5187 {
5188 return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
5189 }
5190
5191
5192 /*
5193 * sys_fstat
5194 *
5195 * Description: Get file status for the file associated with fd
5196 *
5197 * Parameters: p The process doing the fstat
5198 * uap->fd The fd to stat
5199 * uap->ub The user stat buffer
5200 *
5201 * Returns: 0 Success
5202 * !0 Errno (see fstat)
5203 */
5204 int
sys_fstat(proc_t p,struct fstat_args * uap,__unused int32_t * retval)5205 sys_fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
5206 {
5207 return fstat(p, uap->fd, uap->ub, 0, 0, 0);
5208 }
5209
5210
5211 /*
5212 * sys_fstat64_extended
5213 *
5214 * Description: Extended version of fstat64 supporting returning extended
5215 * security information
5216 *
5217 * Parameters: p The process doing the fstat
5218 * uap->fd The fd to stat
5219 * uap->ub The user stat buffer
5220 * uap->xsecurity The user extended security
5221 * buffer, or 0 if none
5222 * uap->xsecurity_size The size of xsecurity, or 0
5223 *
5224 * Returns: 0 Success
5225 * !0 Errno (see fstat)
5226 */
5227 int
sys_fstat64_extended(proc_t p,struct fstat64_extended_args * uap,__unused int32_t * retval)5228 sys_fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
5229 {
5230 return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
5231 }
5232
5233
5234 /*
5235 * sys_fstat64
5236 *
5237 * Description: Get 64 bit version of the file status for the file associated
5238 * with fd
5239 *
5240 * Parameters: p The process doing the fstat
5241 * uap->fd The fd to stat
5242 * uap->ub The user stat buffer
5243 *
5244 * Returns: 0 Success
5245 * !0 Errno (see fstat)
5246 */
5247 int
sys_fstat64(proc_t p,struct fstat64_args * uap,__unused int32_t * retval)5248 sys_fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
5249 {
5250 return fstat(p, uap->fd, uap->ub, 0, 0, 1);
5251 }
5252
5253
5254 /*
5255 * sys_fpathconf
5256 *
5257 * Description: Return pathconf information about a file descriptor.
5258 *
5259 * Parameters: p Process making the request
5260 * uap->fd fd to get information about
5261 * uap->name Name of information desired
5262 * retval Pointer to the call return area
5263 *
5264 * Returns: 0 Success
5265 * EINVAL
5266 * fp_lookup:EBADF Bad file descriptor
5267 * vnode_getwithref:???
5268 * vn_pathconf:???
5269 *
5270 * Implicit returns:
5271 * *retval (modified) Returned information (numeric)
5272 */
5273 int
sys_fpathconf(proc_t p,struct fpathconf_args * uap,int32_t * retval)5274 sys_fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
5275 {
5276 int fd = uap->fd;
5277 struct fileproc *fp;
5278 struct vnode *vp;
5279 int error = 0;
5280 file_type_t type;
5281
5282
5283 AUDIT_ARG(fd, uap->fd);
5284 if ((error = fp_lookup(p, fd, &fp, 0))) {
5285 return error;
5286 }
5287 type = fp->f_type;
5288
5289 switch (type) {
5290 case DTYPE_SOCKET:
5291 if (uap->name != _PC_PIPE_BUF) {
5292 error = EINVAL;
5293 goto out;
5294 }
5295 *retval = PIPE_BUF;
5296 error = 0;
5297 goto out;
5298
5299 case DTYPE_PIPE:
5300 if (uap->name != _PC_PIPE_BUF) {
5301 error = EINVAL;
5302 goto out;
5303 }
5304 *retval = PIPE_BUF;
5305 error = 0;
5306 goto out;
5307
5308 case DTYPE_VNODE:
5309 vp = (struct vnode *)fp_get_data(fp);
5310
5311 if ((error = vnode_getwithref(vp)) == 0) {
5312 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5313
5314 error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
5315
5316 (void)vnode_put(vp);
5317 }
5318 goto out;
5319
5320 default:
5321 error = EINVAL;
5322 goto out;
5323 }
5324 /*NOTREACHED*/
5325 out:
5326 fp_drop(p, fd, fp, 0);
5327 return error;
5328 }
5329
5330 /*
5331 * sys_flock
5332 *
5333 * Description: Apply an advisory lock on a file descriptor.
5334 *
5335 * Parameters: p Process making request
5336 * uap->fd fd on which the lock is to be
5337 * attempted
5338 * uap->how (Un)Lock bits, including type
5339 * retval Pointer to the call return area
5340 *
5341 * Returns: 0 Success
5342 * fp_getfvp:EBADF Bad file descriptor
5343 * fp_getfvp:ENOTSUP fd does not refer to a vnode
5344 * vnode_getwithref:???
5345 * VNOP_ADVLOCK:???
5346 *
5347 * Implicit returns:
5348 * *retval (modified) Size of dtable
5349 *
5350 * Notes: Just attempt to get a record lock of the requested type on
5351 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5352 */
5353 int
sys_flock(proc_t p,struct flock_args * uap,__unused int32_t * retval)5354 sys_flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5355 {
5356 int fd = uap->fd;
5357 int how = uap->how;
5358 struct fileproc *fp;
5359 struct vnode *vp;
5360 struct flock lf;
5361 vfs_context_t ctx = vfs_context_current();
5362 int error = 0;
5363
5364 AUDIT_ARG(fd, uap->fd);
5365 if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5366 return error;
5367 }
5368 if ((error = vnode_getwithref(vp))) {
5369 goto out1;
5370 }
5371 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5372
5373 lf.l_whence = SEEK_SET;
5374 lf.l_start = 0;
5375 lf.l_len = 0;
5376 if (how & LOCK_UN) {
5377 lf.l_type = F_UNLCK;
5378 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5379 goto out;
5380 }
5381 if (how & LOCK_EX) {
5382 lf.l_type = F_WRLCK;
5383 } else if (how & LOCK_SH) {
5384 lf.l_type = F_RDLCK;
5385 } else {
5386 error = EBADF;
5387 goto out;
5388 }
5389 #if CONFIG_MACF
5390 error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob, F_SETLK, &lf);
5391 if (error) {
5392 goto out;
5393 }
5394 #endif
5395 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
5396 (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5397 ctx, NULL);
5398 if (!error) {
5399 os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
5400 }
5401 out:
5402 (void)vnode_put(vp);
5403 out1:
5404 fp_drop(p, fd, fp, 0);
5405 return error;
5406 }
5407
5408 /*
5409 * sys_fileport_makeport
5410 *
5411 * Description: Obtain a Mach send right for a given file descriptor.
5412 *
5413 * Parameters: p Process calling fileport
5414 * uap->fd The fd to reference
5415 * uap->portnamep User address at which to place port name.
5416 *
5417 * Returns: 0 Success.
5418 * EBADF Bad file descriptor.
5419 * EINVAL File descriptor had type that cannot be sent, misc. other errors.
5420 * EFAULT Address at which to store port name is not valid.
5421 * EAGAIN Resource shortage.
5422 *
5423 * Implicit returns:
5424 * On success, name of send right is stored at user-specified address.
5425 */
5426 int
sys_fileport_makeport(proc_t p,struct fileport_makeport_args * uap,__unused int * retval)5427 sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5428 __unused int *retval)
5429 {
5430 int err;
5431 int fd = uap->fd;
5432 user_addr_t user_portaddr = uap->portnamep;
5433 struct fileproc *fp = FILEPROC_NULL;
5434 struct fileglob *fg = NULL;
5435 ipc_port_t fileport;
5436 mach_port_name_t name = MACH_PORT_NULL;
5437
5438 proc_fdlock(p);
5439 err = fp_lookup(p, fd, &fp, 1);
5440 if (err != 0) {
5441 goto out_unlock;
5442 }
5443
5444 fg = fp->fp_glob;
5445 if (!fg_sendable(fg)) {
5446 err = EINVAL;
5447 goto out_unlock;
5448 }
5449
5450 if (fp_isguarded(fp, GUARD_FILEPORT)) {
5451 err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5452 goto out_unlock;
5453 }
5454
5455 /* Dropped when port is deallocated */
5456 fg_ref(p, fg);
5457
5458 proc_fdunlock(p);
5459
5460 /* Allocate and initialize a port */
5461 fileport = fileport_alloc(fg);
5462 if (fileport == IPC_PORT_NULL) {
5463 fg_drop_live(fg);
5464 err = EAGAIN;
5465 goto out;
5466 }
5467
5468 /* Add an entry. Deallocates port on failure. */
5469 name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
5470 if (!MACH_PORT_VALID(name)) {
5471 err = EINVAL;
5472 goto out;
5473 }
5474
5475 err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5476 if (err != 0) {
5477 goto out;
5478 }
5479
5480 /* Tag the fileglob for debugging purposes */
5481 lck_mtx_lock_spin(&fg->fg_lock);
5482 fg->fg_lflags |= FG_PORTMADE;
5483 lck_mtx_unlock(&fg->fg_lock);
5484
5485 fp_drop(p, fd, fp, 0);
5486
5487 return 0;
5488
5489 out_unlock:
5490 proc_fdunlock(p);
5491 out:
5492 if (MACH_PORT_VALID(name)) {
5493 /* Don't care if another thread races us to deallocate the entry */
5494 (void) mach_port_deallocate(get_task_ipcspace(p->task), name);
5495 }
5496
5497 if (fp != FILEPROC_NULL) {
5498 fp_drop(p, fd, fp, 0);
5499 }
5500
5501 return err;
5502 }
5503
5504 void
fileport_releasefg(struct fileglob * fg)5505 fileport_releasefg(struct fileglob *fg)
5506 {
5507 (void)fg_drop(PROC_NULL, fg);
5508 }
5509
5510 /*
5511 * fileport_makefd
5512 *
5513 * Description: Obtain the file descriptor for a given Mach send right.
5514 *
5515 * Returns: 0 Success
5516 * EINVAL Invalid Mach port name, or port is not for a file.
5517 * fdalloc:EMFILE
5518 * fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5519 *
5520 * Implicit returns:
5521 * *retval (modified) The new descriptor
5522 */
5523 int
fileport_makefd(proc_t p,ipc_port_t port,fileproc_flags_t fp_flags,int * retval)5524 fileport_makefd(proc_t p, ipc_port_t port, fileproc_flags_t fp_flags, int *retval)
5525 {
5526 struct fileglob *fg;
5527 struct fileproc *fp = FILEPROC_NULL;
5528 int fd;
5529 int err;
5530
5531 fg = fileport_port_to_fileglob(port);
5532 if (fg == NULL) {
5533 err = EINVAL;
5534 goto out;
5535 }
5536
5537 fp = fileproc_alloc_init();
5538
5539 proc_fdlock(p);
5540 err = fdalloc(p, 0, &fd);
5541 if (err != 0) {
5542 proc_fdunlock(p);
5543 goto out;
5544 }
5545 if (fp_flags) {
5546 fp->fp_flags |= fp_flags;
5547 }
5548
5549 fp->fp_glob = fg;
5550 fg_ref(p, fg);
5551
5552 procfdtbl_releasefd(p, fd, fp);
5553 proc_fdunlock(p);
5554
5555 *retval = fd;
5556 err = 0;
5557 out:
5558 if ((fp != NULL) && (0 != err)) {
5559 fileproc_free(fp);
5560 }
5561
5562 return err;
5563 }
5564
5565 /*
5566 * sys_fileport_makefd
5567 *
5568 * Description: Obtain the file descriptor for a given Mach send right.
5569 *
5570 * Parameters: p Process calling fileport
5571 * uap->port Name of send right to file port.
5572 *
5573 * Returns: 0 Success
5574 * EINVAL Invalid Mach port name, or port is not for a file.
5575 * fdalloc:EMFILE
5576 * fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5577 *
5578 * Implicit returns:
5579 * *retval (modified) The new descriptor
5580 */
5581 int
sys_fileport_makefd(proc_t p,struct fileport_makefd_args * uap,int32_t * retval)5582 sys_fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5583 {
5584 ipc_port_t port = IPC_PORT_NULL;
5585 mach_port_name_t send = uap->port;
5586 kern_return_t res;
5587 int err;
5588
5589 res = ipc_object_copyin(get_task_ipcspace(p->task),
5590 send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
5591
5592 if (res == KERN_SUCCESS) {
5593 err = fileport_makefd(p, port, FP_CLOEXEC, retval);
5594 } else {
5595 err = EINVAL;
5596 }
5597
5598 if (IPC_PORT_NULL != port) {
5599 ipc_port_release_send(port);
5600 }
5601
5602 return err;
5603 }
5604
5605
5606 #pragma mark fileops wrappers
5607
5608 /*
5609 * fo_read
5610 *
5611 * Description: Generic fileops read indirected through the fileops pointer
5612 * in the fileproc structure
5613 *
5614 * Parameters: fp fileproc structure pointer
5615 * uio user I/O structure pointer
5616 * flags FOF_ flags
5617 * ctx VFS context for operation
5618 *
5619 * Returns: 0 Success
5620 * !0 Errno from read
5621 */
5622 int
fo_read(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5623 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5624 {
5625 return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
5626 }
5627
5628 int
fo_no_read(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5629 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5630 {
5631 #pragma unused(fp, uio, flags, ctx)
5632 return ENXIO;
5633 }
5634
5635
5636 /*
5637 * fo_write
5638 *
5639 * Description: Generic fileops write indirected through the fileops pointer
5640 * in the fileproc structure
5641 *
5642 * Parameters: fp fileproc structure pointer
5643 * uio user I/O structure pointer
5644 * flags FOF_ flags
5645 * ctx VFS context for operation
5646 *
5647 * Returns: 0 Success
5648 * !0 Errno from write
5649 */
5650 int
fo_write(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5651 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5652 {
5653 return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
5654 }
5655
5656 int
fo_no_write(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5657 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5658 {
5659 #pragma unused(fp, uio, flags, ctx)
5660 return ENXIO;
5661 }
5662
5663
5664 /*
5665 * fo_ioctl
5666 *
5667 * Description: Generic fileops ioctl indirected through the fileops pointer
5668 * in the fileproc structure
5669 *
5670 * Parameters: fp fileproc structure pointer
5671 * com ioctl command
5672 * data pointer to internalized copy
5673 * of user space ioctl command
5674 * parameter data in kernel space
5675 * ctx VFS context for operation
5676 *
5677 * Returns: 0 Success
5678 * !0 Errno from ioctl
5679 *
5680 * Locks: The caller is assumed to have held the proc_fdlock; this
5681 * function releases and reacquires this lock. If the caller
5682 * accesses data protected by this lock prior to calling this
5683 * function, it will need to revalidate/reacquire any cached
5684 * protected data obtained prior to the call.
5685 */
5686 int
fo_ioctl(struct fileproc * fp,u_long com,caddr_t data,vfs_context_t ctx)5687 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5688 {
5689 int error;
5690
5691 proc_fdunlock(vfs_context_proc(ctx));
5692 error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5693 proc_fdlock(vfs_context_proc(ctx));
5694 return error;
5695 }
5696
5697 int
fo_no_ioctl(struct fileproc * fp,u_long com,caddr_t data,vfs_context_t ctx)5698 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5699 {
5700 #pragma unused(fp, com, data, ctx)
5701 return ENOTTY;
5702 }
5703
5704
5705 /*
5706 * fo_select
5707 *
5708 * Description: Generic fileops select indirected through the fileops pointer
5709 * in the fileproc structure
5710 *
5711 * Parameters: fp fileproc structure pointer
5712 * which select which
5713 * wql pointer to wait queue list
5714 * ctx VFS context for operation
5715 *
5716 * Returns: 0 Success
5717 * !0 Errno from select
5718 */
5719 int
fo_select(struct fileproc * fp,int which,void * wql,vfs_context_t ctx)5720 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5721 {
5722 return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
5723 }
5724
5725 int
fo_no_select(struct fileproc * fp,int which,void * wql,vfs_context_t ctx)5726 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5727 {
5728 #pragma unused(fp, which, wql, ctx)
5729 return ENOTSUP;
5730 }
5731
5732
5733 /*
5734 * fo_close
5735 *
5736 * Description: Generic fileops close indirected through the fileops pointer
5737 * in the fileproc structure
5738 *
5739 * Parameters: fp fileproc structure pointer for
5740 * file to close
5741 * ctx VFS context for operation
5742 *
5743 * Returns: 0 Success
5744 * !0 Errno from close
5745 */
5746 int
fo_close(struct fileglob * fg,vfs_context_t ctx)5747 fo_close(struct fileglob *fg, vfs_context_t ctx)
5748 {
5749 return (*fg->fg_ops->fo_close)(fg, ctx);
5750 }
5751
5752
5753 /*
5754 * fo_drain
5755 *
5756 * Description: Generic fileops kqueue filter indirected through the fileops
5757 * pointer in the fileproc structure
5758 *
5759 * Parameters: fp fileproc structure pointer
5760 * ctx VFS context for operation
5761 *
5762 * Returns: 0 Success
5763 * !0 errno from drain
5764 */
5765 int
fo_drain(struct fileproc * fp,vfs_context_t ctx)5766 fo_drain(struct fileproc *fp, vfs_context_t ctx)
5767 {
5768 return (*fp->f_ops->fo_drain)(fp, ctx);
5769 }
5770
5771 int
fo_no_drain(struct fileproc * fp,vfs_context_t ctx)5772 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
5773 {
5774 #pragma unused(fp, ctx)
5775 return ENOTSUP;
5776 }
5777
5778
5779 /*
5780 * fo_kqfilter
5781 *
5782 * Description: Generic fileops kqueue filter indirected through the fileops
5783 * pointer in the fileproc structure
5784 *
5785 * Parameters: fp fileproc structure pointer
5786 * kn pointer to knote to filter on
5787 *
5788 * Returns: (kn->kn_flags & EV_ERROR) error in kn->kn_data
5789 * 0 Filter is not active
5790 * !0 Filter is active
5791 */
5792 int
fo_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)5793 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5794 {
5795 return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
5796 }
5797
5798 int
fo_no_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)5799 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5800 {
5801 #pragma unused(fp, kev)
5802 knote_set_error(kn, ENOTSUP);
5803 return 0;
5804 }
5805