1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/kauth.h>
82 #include <sys/file_internal.h>
83 #include <sys/guarded.h>
84 #include <sys/priv.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/stat.h>
88 #include <sys/ioctl.h>
89 #include <sys/fcntl.h>
90 #include <sys/fsctl.h>
91 #include <sys/malloc.h>
92 #include <sys/mman.h>
93 #include <sys/mount.h>
94 #include <sys/syslog.h>
95 #include <sys/unistd.h>
96 #include <sys/resourcevar.h>
97 #include <sys/aio_kern.h>
98 #include <sys/ev.h>
99 #include <kern/locks.h>
100 #include <sys/uio_internal.h>
101 #include <sys/codesign.h>
102 #include <sys/codedir_internal.h>
103 #include <sys/mount_internal.h>
104 #include <sys/kdebug.h>
105 #include <sys/sysproto.h>
106 #include <sys/pipe.h>
107 #include <sys/spawn.h>
108 #include <sys/cprotect.h>
109 #include <sys/ubc_internal.h>
110
111 #include <kern/kern_types.h>
112 #include <kern/kalloc.h>
113 #include <kern/waitq.h>
114 #include <kern/ipc_misc.h>
115 #include <kern/ast.h>
116
117 #include <vm/vm_protos.h>
118 #include <mach/mach_port.h>
119
120 #include <security/audit/audit.h>
121 #if CONFIG_MACF
122 #include <security/mac_framework.h>
123 #endif
124
125 #include <stdbool.h>
126 #include <os/atomic_private.h>
127 #include <os/overflow.h>
128 #include <IOKit/IOBSD.h>
129
130 #define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
131 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
132 mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
133 void ipc_port_release_send(ipc_port_t);
134
135 void fileport_releasefg(struct fileglob *fg);
136
137 /* flags for fp_close_and_unlock */
138 #define FD_DUP2RESV 1
139
140 /* We don't want these exported */
141
142 __private_extern__
143 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
144
145 /* Conflict wait queue for when selects collide (opaque type) */
146 extern struct waitq select_conflict_queue;
147
148 #define f_flag fp_glob->fg_flag
149 #define f_type fp_glob->fg_ops->fo_type
150 #define f_cred fp_glob->fg_cred
151 #define f_ops fp_glob->fg_ops
152 #define f_offset fp_glob->fg_offset
153
154 ZONE_DEFINE_TYPE(fg_zone, "fileglob", struct fileglob, ZC_ZFREE_CLEARMEM);
155 ZONE_DEFINE_ID(ZONE_ID_FILEPROC, "fileproc", struct fileproc, ZC_ZFREE_CLEARMEM);
156
157 /*
158 * If you need accounting for KM_OFILETABL consider using
159 * KALLOC_HEAP_DEFINE to define a view.
160 */
161 #define KM_OFILETABL KHEAP_DEFAULT
162
163 /*
164 * Descriptor management.
165 */
166 int nfiles; /* actual number of open files */
167 /*
168 * "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
169 */
170 static const struct fileops uninitops;
171
172 os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
173 static LCK_GRP_DECLARE(file_lck_grp, "file");
174
175
176 #pragma mark fileglobs
177
178 /*!
179 * @function fg_free
180 *
181 * @brief
182 * Free a file structure.
183 */
184 static void
fg_free(struct fileglob * fg)185 fg_free(struct fileglob *fg)
186 {
187 os_atomic_dec(&nfiles, relaxed);
188
189 if (fg->fg_vn_data) {
190 fg_vn_data_free(fg->fg_vn_data);
191 fg->fg_vn_data = NULL;
192 }
193
194 kauth_cred_t cred = fg->fg_cred;
195 if (IS_VALID_CRED(cred)) {
196 kauth_cred_unref(&cred);
197 fg->fg_cred = NOCRED;
198 }
199 lck_mtx_destroy(&fg->fg_lock, &file_lck_grp);
200
201 #if CONFIG_MACF && CONFIG_VNGUARD
202 vng_file_label_destroy(fg);
203 #endif
204 zfree(fg_zone, fg);
205 }
206
207 OS_ALWAYS_INLINE
208 void
fg_ref(proc_t p,struct fileglob * fg)209 fg_ref(proc_t p, struct fileglob *fg)
210 {
211 #if DEBUG || DEVELOPMENT
212 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
213 #else
214 (void)p;
215 #endif
216 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
217 }
218
219 void
fg_drop_live(struct fileglob * fg)220 fg_drop_live(struct fileglob *fg)
221 {
222 os_ref_release_live_raw(&fg->fg_count, &f_refgrp);
223 }
224
225 int
fg_drop(proc_t p,struct fileglob * fg)226 fg_drop(proc_t p, struct fileglob *fg)
227 {
228 struct vnode *vp;
229 struct vfs_context context;
230 int error = 0;
231
232 if (fg == NULL) {
233 return 0;
234 }
235
236 /* Set up context with cred stashed in fg */
237 if (p == current_proc()) {
238 context.vc_thread = current_thread();
239 } else {
240 context.vc_thread = NULL;
241 }
242 context.vc_ucred = fg->fg_cred;
243
244 /*
245 * POSIX record locking dictates that any close releases ALL
246 * locks owned by this process. This is handled by setting
247 * a flag in the unlock to free ONLY locks obeying POSIX
248 * semantics, and not to free BSD-style file locks.
249 * If the descriptor was in a message, POSIX-style locks
250 * aren't passed with the descriptor.
251 */
252 if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
253 (p->p_ladvflag & P_LADVLOCK)) {
254 struct flock lf = {
255 .l_whence = SEEK_SET,
256 .l_type = F_UNLCK,
257 };
258
259 vp = (struct vnode *)fg_get_data(fg);
260 if ((error = vnode_getwithref(vp)) == 0) {
261 (void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
262 (void)vnode_put(vp);
263 }
264 }
265
266 if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == 0) {
267 /*
268 * Since we ensure that fg->fg_ops is always initialized,
269 * it is safe to invoke fo_close on the fg
270 */
271 error = fo_close(fg, &context);
272
273 fg_free(fg);
274 }
275
276 return error;
277 }
278
279 inline
280 void
fg_set_data(struct fileglob * fg,void * fg_data)281 fg_set_data(
282 struct fileglob *fg,
283 void *fg_data)
284 {
285 uintptr_t *store = &fg->fg_data;
286
287 #if __has_feature(ptrauth_calls)
288 int type = FILEGLOB_DTYPE(fg);
289
290 if (fg_data) {
291 type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
292 fg_data = ptrauth_sign_unauthenticated(fg_data,
293 ptrauth_key_process_independent_data,
294 ptrauth_blend_discriminator(store, type));
295 }
296 #endif // __has_feature(ptrauth_calls)
297
298 *store = (uintptr_t)fg_data;
299 }
300
301 inline
302 void *
fg_get_data_volatile(struct fileglob * fg)303 fg_get_data_volatile(struct fileglob *fg)
304 {
305 uintptr_t *store = &fg->fg_data;
306 void *fg_data = (void *)*store;
307
308 #if __has_feature(ptrauth_calls)
309 int type = FILEGLOB_DTYPE(fg);
310
311 if (fg_data) {
312 type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
313 fg_data = ptrauth_auth_data(fg_data,
314 ptrauth_key_process_independent_data,
315 ptrauth_blend_discriminator(store, type));
316 }
317 #endif // __has_feature(ptrauth_calls)
318
319 return fg_data;
320 }
321
322
323 bool
fg_sendable(struct fileglob * fg)324 fg_sendable(struct fileglob *fg)
325 {
326 switch (FILEGLOB_DTYPE(fg)) {
327 case DTYPE_VNODE:
328 case DTYPE_SOCKET:
329 case DTYPE_PIPE:
330 case DTYPE_PSXSHM:
331 case DTYPE_NETPOLICY:
332 return (fg->fg_lflags & FG_CONFINED) == 0;
333
334 default:
335 return false;
336 }
337 }
338
339 #pragma mark file descriptor table (static helpers)
340
341 static void
procfdtbl_reservefd(struct proc * p,int fd)342 procfdtbl_reservefd(struct proc * p, int fd)
343 {
344 p->p_fd.fd_ofiles[fd] = NULL;
345 p->p_fd.fd_ofileflags[fd] |= UF_RESERVED;
346 }
347
348 void
procfdtbl_releasefd(struct proc * p,int fd,struct fileproc * fp)349 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
350 {
351 if (fp != NULL) {
352 p->p_fd.fd_ofiles[fd] = fp;
353 }
354 p->p_fd.fd_ofileflags[fd] &= ~UF_RESERVED;
355 if ((p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
356 p->p_fd.fd_ofileflags[fd] &= ~UF_RESVWAIT;
357 wakeup(&p->p_fd);
358 }
359 }
360
361 static void
procfdtbl_waitfd(struct proc * p,int fd)362 procfdtbl_waitfd(struct proc * p, int fd)
363 {
364 p->p_fd.fd_ofileflags[fd] |= UF_RESVWAIT;
365 msleep(&p->p_fd, &p->p_fd.fd_lock, PRIBIO, "ftbl_waitfd", NULL);
366 }
367
368 static void
procfdtbl_clearfd(struct proc * p,int fd)369 procfdtbl_clearfd(struct proc * p, int fd)
370 {
371 int waiting;
372
373 waiting = (p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT);
374 p->p_fd.fd_ofiles[fd] = NULL;
375 p->p_fd.fd_ofileflags[fd] = 0;
376 if (waiting == UF_RESVWAIT) {
377 wakeup(&p->p_fd);
378 }
379 }
380
381 /*
382 * fdrelse
383 *
384 * Description: Inline utility function to free an fd in a filedesc
385 *
386 * Parameters: fdp Pointer to filedesc fd lies in
387 * fd fd to free
388 * reserv fd should be reserved
389 *
390 * Returns: void
391 *
392 * Locks: Assumes proc_fdlock for process pointing to fdp is held by
393 * the caller
394 */
395 void
fdrelse(struct proc * p,int fd)396 fdrelse(struct proc * p, int fd)
397 {
398 struct filedesc *fdp = &p->p_fd;
399 int nfd = 0;
400
401 if (fd < fdp->fd_freefile) {
402 fdp->fd_freefile = fd;
403 }
404 #if DIAGNOSTIC
405 if (fd >= fdp->fd_afterlast) {
406 panic("fdrelse: fd_afterlast inconsistent");
407 }
408 #endif
409 procfdtbl_clearfd(p, fd);
410
411 nfd = fdp->fd_afterlast;
412 while (nfd > 0 && fdp->fd_ofiles[nfd - 1] == NULL &&
413 !(fdp->fd_ofileflags[nfd - 1] & UF_RESERVED)) {
414 nfd--;
415 }
416 fdp->fd_afterlast = nfd;
417
418 #if CONFIG_PROC_RESOURCE_LIMITS
419 fdp->fd_nfiles_open--;
420 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
421 }
422
423
424 /*
425 * finishdup
426 *
427 * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
428 *
429 * Parameters: p Process performing the dup
430 * old The fd to dup
431 * new The fd to dup it to
432 * fp_flags Flags to augment the new fp
433 * retval Pointer to the call return area
434 *
435 * Returns: 0 Success
436 * EBADF
437 * ENOMEM
438 *
439 * Implicit returns:
440 * *retval (modified) The new descriptor
441 *
442 * Locks: Assumes proc_fdlock for process pointing to fdp is held by
443 * the caller
444 *
445 * Notes: This function may drop and reacquire this lock; it is unsafe
446 * for a caller to assume that other state protected by the lock
447 * has not been subsequently changed out from under it.
448 */
449 static int
finishdup(proc_t p,struct filedesc * fdp,int old,int new,fileproc_flags_t fp_flags,int32_t * retval)450 finishdup(proc_t p, struct filedesc *fdp, int old, int new,
451 fileproc_flags_t fp_flags, int32_t *retval)
452 {
453 struct fileproc *nfp;
454 struct fileproc *ofp;
455 #if CONFIG_MACF
456 int error;
457 kauth_cred_t cred;
458 #endif
459
460 #if DIAGNOSTIC
461 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
462 #endif
463 if ((ofp = fdp->fd_ofiles[old]) == NULL ||
464 (fdp->fd_ofileflags[old] & UF_RESERVED)) {
465 fdrelse(p, new);
466 return EBADF;
467 }
468
469 #if CONFIG_MACF
470 cred = kauth_cred_proc_ref(p);
471 error = mac_file_check_dup(cred, ofp->fp_glob, new);
472 kauth_cred_unref(&cred);
473
474 if (error) {
475 fdrelse(p, new);
476 return error;
477 }
478 #endif
479
480 fg_ref(p, ofp->fp_glob);
481
482 proc_fdunlock(p);
483
484 nfp = fileproc_alloc_init();
485
486 if (fp_flags) {
487 nfp->fp_flags |= fp_flags;
488 }
489 nfp->fp_glob = ofp->fp_glob;
490
491 proc_fdlock(p);
492
493 #if DIAGNOSTIC
494 if (fdp->fd_ofiles[new] != 0) {
495 panic("finishdup: overwriting fd_ofiles with new %d", new);
496 }
497 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
498 panic("finishdup: unreserved fileflags with new %d", new);
499 }
500 #endif
501
502 if (new >= fdp->fd_afterlast) {
503 fdp->fd_afterlast = new + 1;
504 }
505 procfdtbl_releasefd(p, new, nfp);
506 *retval = new;
507 return 0;
508 }
509
510
511 #pragma mark file descriptor table (exported functions)
512
513 void
proc_dirs_lock_shared(proc_t p)514 proc_dirs_lock_shared(proc_t p)
515 {
516 lck_rw_lock_shared(&p->p_fd.fd_dirs_lock);
517 }
518
519 void
proc_dirs_unlock_shared(proc_t p)520 proc_dirs_unlock_shared(proc_t p)
521 {
522 lck_rw_unlock_shared(&p->p_fd.fd_dirs_lock);
523 }
524
525 void
proc_dirs_lock_exclusive(proc_t p)526 proc_dirs_lock_exclusive(proc_t p)
527 {
528 lck_rw_lock_exclusive(&p->p_fd.fd_dirs_lock);
529 }
530
531 void
proc_dirs_unlock_exclusive(proc_t p)532 proc_dirs_unlock_exclusive(proc_t p)
533 {
534 lck_rw_unlock_exclusive(&p->p_fd.fd_dirs_lock);
535 }
536
537 /*
538 * proc_fdlock, proc_fdlock_spin
539 *
540 * Description: Lock to control access to the per process struct fileproc
541 * and struct filedesc
542 *
543 * Parameters: p Process to take the lock on
544 *
545 * Returns: void
546 *
547 * Notes: The lock is initialized in forkproc() and destroyed in
548 * reap_child_process().
549 */
550 void
proc_fdlock(proc_t p)551 proc_fdlock(proc_t p)
552 {
553 lck_mtx_lock(&p->p_fd.fd_lock);
554 }
555
556 void
proc_fdlock_spin(proc_t p)557 proc_fdlock_spin(proc_t p)
558 {
559 lck_mtx_lock_spin(&p->p_fd.fd_lock);
560 }
561
562 void
proc_fdlock_assert(proc_t p,int assertflags)563 proc_fdlock_assert(proc_t p, int assertflags)
564 {
565 lck_mtx_assert(&p->p_fd.fd_lock, assertflags);
566 }
567
568
569 /*
570 * proc_fdunlock
571 *
572 * Description: Unlock the lock previously locked by a call to proc_fdlock()
573 *
574 * Parameters: p Process to drop the lock on
575 *
576 * Returns: void
577 */
578 void
proc_fdunlock(proc_t p)579 proc_fdunlock(proc_t p)
580 {
581 lck_mtx_unlock(&p->p_fd.fd_lock);
582 }
583
584 bool
fdt_available_locked(proc_t p,int n)585 fdt_available_locked(proc_t p, int n)
586 {
587 struct filedesc *fdp = &p->p_fd;
588 struct fileproc **fpp;
589 char *flags;
590 int i;
591 int lim = proc_limitgetcur_nofile(p);
592
593 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
594 return true;
595 }
596 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
597 flags = &fdp->fd_ofileflags[fdp->fd_freefile];
598 for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
599 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
600 return true;
601 }
602 }
603 return false;
604 }
605
606
607 struct fdt_iterator
fdt_next(proc_t p,int fd,bool only_settled)608 fdt_next(proc_t p, int fd, bool only_settled)
609 {
610 struct fdt_iterator it;
611 struct filedesc *fdp = &p->p_fd;
612 struct fileproc *fp;
613 int nfds = fdp->fd_afterlast;
614
615 while (++fd < nfds) {
616 fp = fdp->fd_ofiles[fd];
617 if (fp == NULL || fp->fp_glob == NULL) {
618 continue;
619 }
620 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
621 continue;
622 }
623 it.fdti_fd = fd;
624 it.fdti_fp = fp;
625 return it;
626 }
627
628 it.fdti_fd = nfds;
629 it.fdti_fp = NULL;
630 return it;
631 }
632
633 struct fdt_iterator
fdt_prev(proc_t p,int fd,bool only_settled)634 fdt_prev(proc_t p, int fd, bool only_settled)
635 {
636 struct fdt_iterator it;
637 struct filedesc *fdp = &p->p_fd;
638 struct fileproc *fp;
639
640 while (--fd >= 0) {
641 fp = fdp->fd_ofiles[fd];
642 if (fp == NULL || fp->fp_glob == NULL) {
643 continue;
644 }
645 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
646 continue;
647 }
648 it.fdti_fd = fd;
649 it.fdti_fp = fp;
650 return it;
651 }
652
653 it.fdti_fd = -1;
654 it.fdti_fp = NULL;
655 return it;
656 }
657
658 void
fdt_init(proc_t p)659 fdt_init(proc_t p)
660 {
661 struct filedesc *fdp = &p->p_fd;
662
663 lck_mtx_init(&fdp->fd_kqhashlock, &proc_kqhashlock_grp, &proc_lck_attr);
664 lck_mtx_init(&fdp->fd_knhashlock, &proc_knhashlock_grp, &proc_lck_attr);
665 lck_mtx_init(&fdp->fd_lock, &proc_fdmlock_grp, &proc_lck_attr);
666 lck_rw_init(&fdp->fd_dirs_lock, &proc_dirslock_grp, &proc_lck_attr);
667 }
668
669 void
fdt_destroy(proc_t p)670 fdt_destroy(proc_t p)
671 {
672 struct filedesc *fdp = &p->p_fd;
673
674 lck_mtx_destroy(&fdp->fd_kqhashlock, &proc_kqhashlock_grp);
675 lck_mtx_destroy(&fdp->fd_knhashlock, &proc_knhashlock_grp);
676 lck_mtx_destroy(&fdp->fd_lock, &proc_fdmlock_grp);
677 lck_rw_destroy(&fdp->fd_dirs_lock, &proc_dirslock_grp);
678 }
679
680 void
fdt_exec(proc_t p,short posix_spawn_flags)681 fdt_exec(proc_t p, short posix_spawn_flags)
682 {
683 struct filedesc *fdp = &p->p_fd;
684 thread_t self = current_thread();
685 struct uthread *ut = get_bsdthread_info(self);
686 struct kqworkq *dealloc_kqwq = NULL;
687
688 /*
689 * If the current thread is bound as a workq/workloop
690 * servicing thread, we need to unbind it first.
691 */
692 if (ut->uu_kqr_bound && get_bsdthreadtask_info(self) == p) {
693 kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
694 }
695
696 /*
697 * Deallocate the knotes for this process
698 * and mark the tables non-existent so
699 * subsequent kqueue closes go faster.
700 */
701 knotes_dealloc(p);
702 assert(fdp->fd_knlistsize == 0);
703 assert(fdp->fd_knhashmask == 0);
704
705 proc_fdlock(p);
706
707 for (int i = fdp->fd_afterlast; i-- > 0;) {
708 struct fileproc *fp = fdp->fd_ofiles[i];
709 char *flagp = &fdp->fd_ofileflags[i];
710 bool inherit_file = true;
711
712 if (fp == FILEPROC_NULL) {
713 continue;
714 }
715
716 /*
717 * no file descriptor should be in flux when in exec,
718 * because we stopped all other threads
719 */
720 if (*flagp & ~UF_INHERIT) {
721 panic("file %d/%p in flux during exec of %p", i, fp, p);
722 }
723
724 if (fp->fp_flags & FP_CLOEXEC) {
725 inherit_file = false;
726 } else if ((posix_spawn_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) &&
727 !(*flagp & UF_INHERIT)) {
728 /*
729 * Reverse the usual semantics of file descriptor
730 * inheritance - all of them should be closed
731 * except files marked explicitly as "inherit" and
732 * not marked close-on-exec.
733 */
734 inherit_file = false;
735 #if CONFIG_MACF
736 } else if (mac_file_check_inherit(proc_ucred(p), fp->fp_glob)) {
737 inherit_file = false;
738 #endif
739 }
740
741 *flagp = 0; /* clear UF_INHERIT */
742
743 if (!inherit_file) {
744 fp_close_and_unlock(p, i, fp, 0);
745 proc_fdlock(p);
746 }
747 }
748
749 /* release the per-process workq kq */
750 if (fdp->fd_wqkqueue) {
751 dealloc_kqwq = fdp->fd_wqkqueue;
752 fdp->fd_wqkqueue = NULL;
753 }
754
755 proc_fdunlock(p);
756
757 /* Anything to free? */
758 if (dealloc_kqwq) {
759 kqworkq_dealloc(dealloc_kqwq);
760 }
761 }
762
763
764 int
fdt_fork(struct filedesc * newfdp,proc_t p,vnode_t uth_cdir)765 fdt_fork(struct filedesc *newfdp, proc_t p, vnode_t uth_cdir)
766 {
767 struct filedesc *fdp = &p->p_fd;
768 struct fileproc **ofiles;
769 char *ofileflags;
770 int n_files, afterlast, freefile;
771 vnode_t v_dir;
772 #if CONFIG_PROC_RESOURCE_LIMITS
773 int fd_nfiles_open = 0;
774 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
775 proc_fdlock(p);
776
777 newfdp->fd_flags = (fdp->fd_flags & FILEDESC_FORK_INHERITED_MASK);
778 newfdp->fd_cmask = fdp->fd_cmask;
779 #if CONFIG_PROC_RESOURCE_LIMITS
780 newfdp->fd_nfiles_soft_limit = fdp->fd_nfiles_soft_limit;
781 newfdp->fd_nfiles_hard_limit = fdp->fd_nfiles_hard_limit;
782 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
783
784 /*
785 * For both fd_cdir and fd_rdir make sure we get
786 * a valid reference... if we can't, than set
787 * set the pointer(s) to NULL in the child... this
788 * will keep us from using a non-referenced vp
789 * and allows us to do the vnode_rele only on
790 * a properly referenced vp
791 */
792 if ((v_dir = fdp->fd_rdir)) {
793 if (vnode_getwithref(v_dir) == 0) {
794 if (vnode_ref(v_dir) == 0) {
795 newfdp->fd_rdir = v_dir;
796 }
797 vnode_put(v_dir);
798 }
799 if (newfdp->fd_rdir == NULL) {
800 /*
801 * We couldn't get a new reference on
802 * the chroot directory being
803 * inherited... this is fatal, since
804 * otherwise it would constitute an
805 * escape from a chroot environment by
806 * the new process.
807 */
808 proc_fdunlock(p);
809 return EPERM;
810 }
811 }
812
813 /*
814 * If we are running with per-thread current working directories,
815 * inherit the new current working directory from the current thread.
816 */
817 if ((v_dir = uth_cdir ? uth_cdir : fdp->fd_cdir)) {
818 if (vnode_getwithref(v_dir) == 0) {
819 if (vnode_ref(v_dir) == 0) {
820 newfdp->fd_cdir = v_dir;
821 }
822 vnode_put(v_dir);
823 }
824 if (newfdp->fd_cdir == NULL && v_dir == fdp->fd_cdir) {
825 /*
826 * we couldn't get a new reference on
827 * the current working directory being
828 * inherited... we might as well drop
829 * our reference from the parent also
830 * since the vnode has gone DEAD making
831 * it useless... by dropping it we'll
832 * be that much closer to recycling it
833 */
834 vnode_rele(fdp->fd_cdir);
835 fdp->fd_cdir = NULL;
836 }
837 }
838
839 /*
840 * If the number of open files fits in the internal arrays
841 * of the open file structure, use them, otherwise allocate
842 * additional memory for the number of descriptors currently
843 * in use.
844 */
845 afterlast = fdp->fd_afterlast;
846 freefile = fdp->fd_freefile;
847 if (afterlast <= NDFILE) {
848 n_files = NDFILE;
849 } else {
850 n_files = roundup(afterlast, NDEXTENT);
851 }
852
853 proc_fdunlock(p);
854
855 ofiles = kheap_alloc(KM_OFILETABL, n_files * OFILESIZE,
856 Z_WAITOK | Z_ZERO);
857 if (ofiles == NULL) {
858 if (newfdp->fd_cdir) {
859 vnode_rele(newfdp->fd_cdir);
860 newfdp->fd_cdir = NULL;
861 }
862 if (newfdp->fd_rdir) {
863 vnode_rele(newfdp->fd_rdir);
864 newfdp->fd_rdir = NULL;
865 }
866 return ENOMEM;
867 }
868 ofileflags = (char *)&ofiles[n_files];
869
870 proc_fdlock(p);
871
872 for (int i = afterlast; i-- > 0;) {
873 struct fileproc *ofp, *nfp;
874 char flags;
875
876 ofp = fdp->fd_ofiles[i];
877 flags = fdp->fd_ofileflags[i];
878
879 if (ofp == NULL ||
880 (ofp->fp_glob->fg_lflags & FG_CONFINED) ||
881 (ofp->fp_flags & FP_CLOFORK) ||
882 (flags & UF_RESERVED)) {
883 if (i + 1 == afterlast) {
884 afterlast = i;
885 }
886 if (i < freefile) {
887 freefile = i;
888 }
889
890 continue;
891 }
892
893 assert(ofp->fp_guard_attrs == 0);
894 nfp = fileproc_alloc_init();
895 nfp->fp_glob = ofp->fp_glob;
896 nfp->fp_flags = (ofp->fp_flags & FP_CLOEXEC);
897 fg_ref(p, nfp->fp_glob);
898
899 ofiles[i] = nfp;
900 #if CONFIG_PROC_RESOURCE_LIMITS
901 fd_nfiles_open++;
902 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
903 }
904
905 proc_fdunlock(p);
906
907 newfdp->fd_ofiles = ofiles;
908 newfdp->fd_ofileflags = ofileflags;
909 newfdp->fd_nfiles = n_files;
910 newfdp->fd_afterlast = afterlast;
911 newfdp->fd_freefile = freefile;
912
913 #if CONFIG_PROC_RESOURCE_LIMITS
914 newfdp->fd_nfiles_open = fd_nfiles_open;
915 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
916
917 return 0;
918 }
919
920 void
fdt_invalidate(proc_t p)921 fdt_invalidate(proc_t p)
922 {
923 struct filedesc *fdp = &p->p_fd;
924 struct fileproc *fp, **ofiles;
925 struct kqworkq *kqwq = NULL;
926 vnode_t vn1 = NULL, vn2 = NULL;
927 struct kqwllist *kqhash = NULL;
928 u_long kqhashmask = 0;
929 int n_files = 0;
930
931 /*
932 * deallocate all the knotes up front and claim empty
933 * tables to make any subsequent kqueue closes faster.
934 */
935 knotes_dealloc(p);
936 assert(fdp->fd_knlistsize == 0);
937 assert(fdp->fd_knhashmask == 0);
938
939 /*
940 * dealloc all workloops that have outstanding retains
941 * when created with scheduling parameters.
942 */
943 kqworkloops_dealloc(p);
944
945 proc_fdlock(p);
946
947 /* close file descriptors */
948 if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
949 for (int i = fdp->fd_afterlast; i-- > 0;) {
950 if ((fp = fdp->fd_ofiles[i]) != NULL) {
951 if (fdp->fd_ofileflags[i] & UF_RESERVED) {
952 panic("fdfree: found fp with UF_RESERVED");
953 }
954 fp_close_and_unlock(p, i, fp, 0);
955 proc_fdlock(p);
956 }
957 }
958 }
959
960 n_files = fdp->fd_nfiles;
961 ofiles = fdp->fd_ofiles;
962 kqwq = fdp->fd_wqkqueue;
963 vn1 = fdp->fd_cdir;
964 vn2 = fdp->fd_rdir;
965
966 fdp->fd_ofileflags = NULL;
967 fdp->fd_ofiles = NULL;
968 fdp->fd_nfiles = 0;
969 fdp->fd_wqkqueue = NULL;
970 fdp->fd_cdir = NULL;
971 fdp->fd_rdir = NULL;
972
973 proc_fdunlock(p);
974
975 lck_mtx_lock(&fdp->fd_knhashlock);
976
977 kqhash = fdp->fd_kqhash;
978 kqhashmask = fdp->fd_kqhashmask;
979
980 fdp->fd_kqhash = 0;
981 fdp->fd_kqhashmask = 0;
982
983 lck_mtx_unlock(&fdp->fd_knhashlock);
984
985 kheap_free(KM_OFILETABL, ofiles, n_files * OFILESIZE);
986
987 if (kqwq) {
988 kqworkq_dealloc(kqwq);
989 }
990 if (vn1) {
991 vnode_rele(vn1);
992 }
993 if (vn2) {
994 vnode_rele(vn2);
995 }
996 if (kqhash) {
997 for (uint32_t i = 0; i <= kqhashmask; i++) {
998 assert(LIST_EMPTY(&kqhash[i]));
999 }
1000 hashdestroy(kqhash, M_KQUEUE, kqhashmask);
1001 }
1002 }
1003
1004
1005 struct fileproc *
fileproc_alloc_init(void)1006 fileproc_alloc_init(void)
1007 {
1008 struct fileproc *fp;
1009
1010 fp = zalloc_id(ZONE_ID_FILEPROC, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1011 os_ref_init(&fp->fp_iocount, &f_refgrp);
1012 return fp;
1013 }
1014
1015
1016 void
fileproc_free(struct fileproc * fp)1017 fileproc_free(struct fileproc *fp)
1018 {
1019 os_ref_count_t __unused refc = os_ref_release(&fp->fp_iocount);
1020 #if DEVELOPMENT || DEBUG
1021 if (0 != refc) {
1022 panic("%s: pid %d refc: %u != 0",
1023 __func__, proc_pid(current_proc()), refc);
1024 }
1025 #endif
1026 if (fp->fp_guard_attrs) {
1027 guarded_fileproc_unguard(fp);
1028 }
1029 assert(fp->fp_wset == NULL);
1030 zfree_id(ZONE_ID_FILEPROC, fp);
1031 }
1032
1033
1034 /*
1035 * Statistics counter for the number of times a process calling fdalloc()
1036 * has resulted in an expansion of the per process open file table.
1037 *
1038 * XXX This would likely be of more use if it were per process
1039 */
1040 int fdexpand;
1041
1042 #if CONFIG_PROC_RESOURCE_LIMITS
1043 /*
1044 * Should be called only with the proc_fdlock held.
1045 */
1046 void
fd_check_limit_exceeded(struct filedesc * fdp)1047 fd_check_limit_exceeded(struct filedesc *fdp)
1048 {
1049 #if DIAGNOSTIC
1050 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1051 #endif
1052 if (!fd_above_soft_limit_notify(fdp) && fdp->fd_nfiles_soft_limit &&
1053 (fdp->fd_nfiles_open > fdp->fd_nfiles_soft_limit)) {
1054 fd_above_soft_limit_send_notification(fdp);
1055 act_set_astproc_resource(current_thread());
1056 } else if (!fd_above_hard_limit_notify(fdp) && fdp->fd_nfiles_hard_limit &&
1057 (fdp->fd_nfiles_open > fdp->fd_nfiles_hard_limit)) {
1058 fd_above_hard_limit_send_notification(fdp);
1059 act_set_astproc_resource(current_thread());
1060 }
1061 }
1062 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1063
1064 /*
1065 * fdalloc
1066 *
1067 * Description: Allocate a file descriptor for the process.
1068 *
1069 * Parameters: p Process to allocate the fd in
1070 * want The fd we would prefer to get
1071 * result Pointer to fd we got
1072 *
1073 * Returns: 0 Success
1074 * EMFILE
1075 * ENOMEM
1076 *
1077 * Implicit returns:
1078 * *result (modified) The fd which was allocated
1079 */
1080 int
fdalloc(proc_t p,int want,int * result)1081 fdalloc(proc_t p, int want, int *result)
1082 {
1083 struct filedesc *fdp = &p->p_fd;
1084 int i;
1085 int last, numfiles, oldnfiles;
1086 struct fileproc **newofiles, **ofiles;
1087 char *newofileflags;
1088 int lim = proc_limitgetcur_nofile(p);
1089
1090 /*
1091 * Search for a free descriptor starting at the higher
1092 * of want or fd_freefile. If that fails, consider
1093 * expanding the ofile array.
1094 */
1095 #if DIAGNOSTIC
1096 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1097 #endif
1098
1099 for (;;) {
1100 last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
1101 if ((i = want) < fdp->fd_freefile) {
1102 i = fdp->fd_freefile;
1103 }
1104 for (; i < last; i++) {
1105 if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
1106 procfdtbl_reservefd(p, i);
1107 if (i >= fdp->fd_afterlast) {
1108 fdp->fd_afterlast = i + 1;
1109 }
1110 if (want <= fdp->fd_freefile) {
1111 fdp->fd_freefile = i;
1112 }
1113 *result = i;
1114 #if CONFIG_PROC_RESOURCE_LIMITS
1115 fdp->fd_nfiles_open++;
1116 fd_check_limit_exceeded(fdp);
1117 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1118 return 0;
1119 }
1120 }
1121
1122 /*
1123 * No space in current array. Expand?
1124 */
1125 if ((rlim_t)fdp->fd_nfiles >= lim) {
1126 return EMFILE;
1127 }
1128 if (fdp->fd_nfiles < NDEXTENT) {
1129 numfiles = NDEXTENT;
1130 } else {
1131 numfiles = 2 * fdp->fd_nfiles;
1132 }
1133 /* Enforce lim */
1134 if ((rlim_t)numfiles > lim) {
1135 numfiles = (int)lim;
1136 }
1137 proc_fdunlock(p);
1138 newofiles = kheap_alloc(KM_OFILETABL, numfiles * OFILESIZE,
1139 Z_WAITOK);
1140 proc_fdlock(p);
1141 if (newofiles == NULL) {
1142 return ENOMEM;
1143 }
1144 if (fdp->fd_nfiles >= numfiles) {
1145 kheap_free(KM_OFILETABL, newofiles, numfiles * OFILESIZE);
1146 continue;
1147 }
1148 newofileflags = (char *) &newofiles[numfiles];
1149 /*
1150 * Copy the existing ofile and ofileflags arrays
1151 * and zero the new portion of each array.
1152 */
1153 oldnfiles = fdp->fd_nfiles;
1154 (void) memcpy(newofiles, fdp->fd_ofiles,
1155 oldnfiles * sizeof(*fdp->fd_ofiles));
1156 (void) memset(&newofiles[oldnfiles], 0,
1157 (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
1158
1159 (void) memcpy(newofileflags, fdp->fd_ofileflags,
1160 oldnfiles * sizeof(*fdp->fd_ofileflags));
1161 (void) memset(&newofileflags[oldnfiles], 0,
1162 (numfiles - oldnfiles) *
1163 sizeof(*fdp->fd_ofileflags));
1164 ofiles = fdp->fd_ofiles;
1165 fdp->fd_ofiles = newofiles;
1166 fdp->fd_ofileflags = newofileflags;
1167 fdp->fd_nfiles = numfiles;
1168 kheap_free(KM_OFILETABL, ofiles, oldnfiles * OFILESIZE);
1169 fdexpand++;
1170 }
1171 }
1172
1173
1174 #pragma mark fileprocs
1175
1176 void
fileproc_modify_vflags(struct fileproc * fp,fileproc_vflags_t vflags,boolean_t clearflags)1177 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
1178 {
1179 if (clearflags) {
1180 os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
1181 } else {
1182 os_atomic_or(&fp->fp_vflags, vflags, relaxed);
1183 }
1184 }
1185
1186 fileproc_vflags_t
fileproc_get_vflags(struct fileproc * fp)1187 fileproc_get_vflags(struct fileproc *fp)
1188 {
1189 return os_atomic_load(&fp->fp_vflags, relaxed);
1190 }
1191
1192 /*
1193 * falloc_withinit
1194 *
1195 * Create a new open file structure and allocate
1196 * a file descriptor for the process that refers to it.
1197 *
1198 * Returns: 0 Success
1199 *
1200 * Description: Allocate an entry in the per process open file table and
1201 * return the corresponding fileproc and fd.
1202 *
1203 * Parameters: p The process in whose open file
1204 * table the fd is to be allocated
1205 * resultfp Pointer to fileproc pointer
1206 * return area
1207 * resultfd Pointer to fd return area
1208 * ctx VFS context
1209 * fp_zalloc fileproc allocator to use
1210 * crarg allocator args
1211 *
1212 * Returns: 0 Success
1213 * ENFILE Too many open files in system
1214 * fdalloc:EMFILE Too many open files in process
1215 * fdalloc:ENOMEM M_OFILETABL zone exhausted
1216 * ENOMEM fp_zone or fg_zone zone
1217 * exhausted
1218 *
1219 * Implicit returns:
1220 * *resultfd (modified) Returned fileproc pointer
1221 * *resultfd (modified) Returned fd
1222 *
1223 * Notes: This function takes separate process and context arguments
1224 * solely to support kern_exec.c; otherwise, it would take
1225 * neither, and use the vfs_context_current() routine internally.
1226 */
1227 int
falloc_withinit(proc_t p,struct fileproc ** resultfp,int * resultfd,vfs_context_t ctx,fp_initfn_t fp_init,void * initarg)1228 falloc_withinit(proc_t p, struct fileproc **resultfp, int *resultfd,
1229 vfs_context_t ctx, fp_initfn_t fp_init, void *initarg)
1230 {
1231 struct fileproc *fp;
1232 struct fileglob *fg;
1233 int error, nfd;
1234 #if CONFIG_MACF
1235 kauth_cred_t cred;
1236 #endif
1237
1238 /* Make sure we don't go beyond the system-wide limit */
1239 if (nfiles >= maxfiles) {
1240 tablefull("file");
1241 return ENFILE;
1242 }
1243
1244 proc_fdlock(p);
1245
1246 /* fdalloc will make sure the process stays below per-process limit */
1247 if ((error = fdalloc(p, 0, &nfd))) {
1248 proc_fdunlock(p);
1249 return error;
1250 }
1251
1252 #if CONFIG_MACF
1253 cred = kauth_cred_proc_ref(p);
1254 error = mac_file_check_create(cred);
1255 kauth_cred_unref(&cred);
1256 if (error) {
1257 proc_fdunlock(p);
1258 return error;
1259 }
1260 #endif
1261
1262 /*
1263 * Allocate a new file descriptor.
1264 * If the process has file descriptor zero open, add to the list
1265 * of open files at that point, otherwise put it at the front of
1266 * the list of open files.
1267 */
1268 proc_fdunlock(p);
1269
1270 fp = fileproc_alloc_init();
1271 if (fp_init) {
1272 fp_init(fp, initarg);
1273 }
1274
1275 fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
1276 lck_mtx_init(&fg->fg_lock, &file_lck_grp, LCK_ATTR_NULL);
1277
1278 os_ref_retain_locked(&fp->fp_iocount);
1279 os_ref_init_raw(&fg->fg_count, &f_refgrp);
1280 fg->fg_ops = &uninitops;
1281 fp->fp_glob = fg;
1282
1283 kauth_cred_ref(ctx->vc_ucred);
1284
1285 fp->f_cred = ctx->vc_ucred;
1286
1287 os_atomic_inc(&nfiles, relaxed);
1288
1289 proc_fdlock(p);
1290
1291 p->p_fd.fd_ofiles[nfd] = fp;
1292
1293 proc_fdunlock(p);
1294
1295 if (resultfp) {
1296 *resultfp = fp;
1297 }
1298 if (resultfd) {
1299 *resultfd = nfd;
1300 }
1301
1302 return 0;
1303 }
1304
1305 int
falloc(proc_t p,struct fileproc ** resultfp,int * resultfd,vfs_context_t ctx)1306 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
1307 {
1308 return falloc_withinit(p, resultfp, resultfd, ctx, NULL, NULL);
1309 }
1310
1311
1312 /*
1313 * fp_free
1314 *
1315 * Description: Release the fd and free the fileproc associated with the fd
1316 * in the per process open file table of the specified process;
1317 * these values must correspond.
1318 *
1319 * Parameters: p Process containing fd
1320 * fd fd to be released
1321 * fp fileproc to be freed
1322 */
1323 void
fp_free(proc_t p,int fd,struct fileproc * fp)1324 fp_free(proc_t p, int fd, struct fileproc * fp)
1325 {
1326 proc_fdlock_spin(p);
1327 fdrelse(p, fd);
1328 proc_fdunlock(p);
1329
1330 fg_free(fp->fp_glob);
1331 os_ref_release_live(&fp->fp_iocount);
1332 fileproc_free(fp);
1333 }
1334
1335
1336 struct fileproc *
fp_get_noref_locked(proc_t p,int fd)1337 fp_get_noref_locked(proc_t p, int fd)
1338 {
1339 struct filedesc *fdp = &p->p_fd;
1340 struct fileproc *fp;
1341
1342 if (fd < 0 || fd >= fdp->fd_nfiles ||
1343 (fp = fdp->fd_ofiles[fd]) == NULL ||
1344 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1345 return NULL;
1346 }
1347
1348 zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1349 return fp;
1350 }
1351
1352 struct fileproc *
fp_get_noref_locked_with_iocount(proc_t p,int fd)1353 fp_get_noref_locked_with_iocount(proc_t p, int fd)
1354 {
1355 struct filedesc *fdp = &p->p_fd;
1356 struct fileproc *fp = NULL;
1357
1358 if (fd < 0 || fd >= fdp->fd_nfiles ||
1359 (fp = fdp->fd_ofiles[fd]) == NULL ||
1360 os_ref_get_count(&fp->fp_iocount) <= 1 ||
1361 ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1362 !(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
1363 panic("%s: caller without an ioccount on fileproc (%d/:%p)",
1364 __func__, fd, fp);
1365 }
1366
1367 zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1368 return fp;
1369 }
1370
1371
1372 /*
1373 * fp_lookup
1374 *
1375 * Description: Get fileproc pointer for a given fd from the per process
1376 * open file table of the specified process and if successful,
1377 * increment the fp_iocount
1378 *
1379 * Parameters: p Process in which fd lives
1380 * fd fd to get information for
1381 * resultfp Pointer to result fileproc
1382 * pointer area, or 0 if none
1383 * locked !0 if the caller holds the
1384 * proc_fdlock, 0 otherwise
1385 *
1386 * Returns: 0 Success
1387 * EBADF Bad file descriptor
1388 *
1389 * Implicit returns:
1390 * *resultfp (modified) Fileproc pointer
1391 *
1392 * Locks: If the argument 'locked' is non-zero, then the caller is
1393 * expected to have taken and held the proc_fdlock; if it is
1394 * zero, than this routine internally takes and drops this lock.
1395 */
1396 int
fp_lookup(proc_t p,int fd,struct fileproc ** resultfp,int locked)1397 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
1398 {
1399 struct filedesc *fdp = &p->p_fd;
1400 struct fileproc *fp;
1401
1402 if (!locked) {
1403 proc_fdlock_spin(p);
1404 }
1405 if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
1406 (fp = fdp->fd_ofiles[fd]) == NULL ||
1407 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1408 if (!locked) {
1409 proc_fdunlock(p);
1410 }
1411 return EBADF;
1412 }
1413
1414 zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1415 os_ref_retain_locked(&fp->fp_iocount);
1416
1417 if (resultfp) {
1418 *resultfp = fp;
1419 }
1420 if (!locked) {
1421 proc_fdunlock(p);
1422 }
1423
1424 return 0;
1425 }
1426
1427
1428 int
fp_get_ftype(proc_t p,int fd,file_type_t ftype,int err,struct fileproc ** fpp)1429 fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
1430 {
1431 struct filedesc *fdp = &p->p_fd;
1432 struct fileproc *fp;
1433
1434 proc_fdlock_spin(p);
1435 if (fd < 0 || fd >= fdp->fd_nfiles ||
1436 (fp = fdp->fd_ofiles[fd]) == NULL ||
1437 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1438 proc_fdunlock(p);
1439 return EBADF;
1440 }
1441
1442 if (fp->f_type != ftype) {
1443 proc_fdunlock(p);
1444 return err;
1445 }
1446
1447 zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1448 os_ref_retain_locked(&fp->fp_iocount);
1449 proc_fdunlock(p);
1450
1451 *fpp = fp;
1452 return 0;
1453 }
1454
1455
1456 /*
1457 * fp_drop
1458 *
1459 * Description: Drop the I/O reference previously taken by calling fp_lookup
1460 * et. al.
1461 *
1462 * Parameters: p Process in which the fd lives
1463 * fd fd associated with the fileproc
1464 * fp fileproc on which to set the
1465 * flag and drop the reference
1466 * locked flag to internally take and
1467 * drop proc_fdlock if it is not
1468 * already held by the caller
1469 *
1470 * Returns: 0 Success
1471 * EBADF Bad file descriptor
1472 *
1473 * Locks: This function internally takes and drops the proc_fdlock for
1474 * the supplied process if 'locked' is non-zero, and assumes that
1475 * the caller already holds this lock if 'locked' is non-zero.
1476 *
1477 * Notes: The fileproc must correspond to the fd in the supplied proc
1478 */
1479 int
fp_drop(proc_t p,int fd,struct fileproc * fp,int locked)1480 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
1481 {
1482 struct filedesc *fdp = &p->p_fd;
1483 int needwakeup = 0;
1484
1485 if (!locked) {
1486 proc_fdlock_spin(p);
1487 }
1488 if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
1489 (fp = fdp->fd_ofiles[fd]) == NULL ||
1490 ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1491 !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
1492 if (!locked) {
1493 proc_fdunlock(p);
1494 }
1495 return EBADF;
1496 }
1497
1498 if (1 == os_ref_release_locked(&fp->fp_iocount)) {
1499 if (fp->fp_flags & FP_SELCONFLICT) {
1500 fp->fp_flags &= ~FP_SELCONFLICT;
1501 }
1502
1503 if (fdp->fd_fpdrainwait) {
1504 fdp->fd_fpdrainwait = 0;
1505 needwakeup = 1;
1506 }
1507 }
1508 if (!locked) {
1509 proc_fdunlock(p);
1510 }
1511 if (needwakeup) {
1512 wakeup(&fdp->fd_fpdrainwait);
1513 }
1514
1515 return 0;
1516 }
1517
1518
1519 /*
1520 * fileproc_drain
1521 *
1522 * Description: Drain out pending I/O operations
1523 *
1524 * Parameters: p Process closing this file
1525 * fp fileproc struct for the open
1526 * instance on the file
1527 *
1528 * Returns: void
1529 *
1530 * Locks: Assumes the caller holds the proc_fdlock
1531 *
1532 * Notes: For character devices, this occurs on the last close of the
1533 * device; for all other file descriptors, this occurs on each
1534 * close to prevent fd's from being closed out from under
1535 * operations currently in progress and blocked
1536 *
1537 * See Also: file_vnode(), file_socket(), file_drop(), and the cautions
1538 * regarding their use and interaction with this function.
1539 */
1540 static void
fileproc_drain(proc_t p,struct fileproc * fp)1541 fileproc_drain(proc_t p, struct fileproc * fp)
1542 {
1543 struct filedesc *fdp = &p->p_fd;
1544 struct vfs_context context;
1545 thread_t thread;
1546 bool is_current_proc;
1547
1548 is_current_proc = (p == current_proc());
1549
1550 if (!is_current_proc) {
1551 proc_lock(p);
1552 thread = proc_thread(p); /* XXX */
1553 thread_reference(thread);
1554 proc_unlock(p);
1555 } else {
1556 thread = current_thread();
1557 }
1558
1559 context.vc_thread = thread;
1560 context.vc_ucred = fp->fp_glob->fg_cred;
1561
1562 /* Set the vflag for drain */
1563 fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
1564
1565 while (os_ref_get_count(&fp->fp_iocount) > 1) {
1566 lck_mtx_convert_spin(&fdp->fd_lock);
1567
1568 fo_drain(fp, &context);
1569 if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
1570 struct select_set *selset;
1571
1572 if (fp->fp_guard_attrs) {
1573 selset = fp->fp_guard->fpg_wset;
1574 } else {
1575 selset = fp->fp_wset;
1576 }
1577 if (waitq_wakeup64_all(selset, NO_EVENT64,
1578 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
1579 panic("bad wait queue for waitq_wakeup64_all %p (%sfp:%p)",
1580 selset, fp->fp_guard_attrs ? "guarded " : "", fp);
1581 }
1582 }
1583 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1584 if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
1585 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
1586 panic("bad select_conflict_queue");
1587 }
1588 }
1589 fdp->fd_fpdrainwait = 1;
1590 msleep(&fdp->fd_fpdrainwait, &fdp->fd_lock, PRIBIO, "fpdrain", NULL);
1591 }
1592 #if DIAGNOSTIC
1593 if ((fp->fp_flags & FP_INSELECT) != 0) {
1594 panic("FP_INSELECT set on drained fp");
1595 }
1596 #endif
1597 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1598 fp->fp_flags &= ~FP_SELCONFLICT;
1599 }
1600
1601 if (!is_current_proc) {
1602 thread_deallocate(thread);
1603 }
1604 }
1605
1606
1607 int
fp_close_and_unlock(proc_t p,int fd,struct fileproc * fp,int flags)1608 fp_close_and_unlock(proc_t p, int fd, struct fileproc *fp, int flags)
1609 {
1610 struct filedesc *fdp = &p->p_fd;
1611 struct fileglob *fg = fp->fp_glob;
1612 #if CONFIG_MACF
1613 kauth_cred_t cred;
1614 #endif
1615
1616 #if DIAGNOSTIC
1617 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1618 #endif
1619
1620 /*
1621 * Keep most people from finding the filedesc while we are closing it.
1622 *
1623 * Callers are:
1624 *
1625 * - dup2() which always waits for UF_RESERVED to clear
1626 *
1627 * - close/guarded_close/... who will fail the fileproc lookup if
1628 * UF_RESERVED is set,
1629 *
1630 * - fdexec()/fdfree() who only run once all threads in the proc
1631 * are properly canceled, hence no fileproc in this proc should
1632 * be in flux.
1633 *
1634 * Which means that neither UF_RESERVED nor UF_CLOSING should be set.
1635 *
1636 * Callers of fp_get_noref_locked_with_iocount() can still find
1637 * this entry so that they can drop their I/O reference despite
1638 * not having remembered the fileproc pointer (namely select() and
1639 * file_drop()).
1640 */
1641 if (p->p_fd.fd_ofileflags[fd] & (UF_RESERVED | UF_CLOSING)) {
1642 panic("%s: called with fileproc in flux (%d/:%p)",
1643 __func__, fd, fp);
1644 }
1645 p->p_fd.fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
1646
1647 if ((fp->fp_flags & FP_AIOISSUED) ||
1648 #if CONFIG_MACF
1649 (FILEGLOB_DTYPE(fg) == DTYPE_VNODE)
1650 #else
1651 kauth_authorize_fileop_has_listeners()
1652 #endif
1653 ) {
1654 proc_fdunlock(p);
1655
1656 if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1657 /*
1658 * call out to allow 3rd party notification of close.
1659 * Ignore result of kauth_authorize_fileop call.
1660 */
1661 #if CONFIG_MACF
1662 cred = kauth_cred_proc_ref(p);
1663 mac_file_notify_close(cred, fp->fp_glob);
1664 kauth_cred_unref(&cred);
1665 #endif
1666
1667 if (kauth_authorize_fileop_has_listeners() &&
1668 vnode_getwithref((vnode_t)fg_get_data(fg)) == 0) {
1669 u_int fileop_flags = 0;
1670 if (fg->fg_flag & FWASWRITTEN) {
1671 fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
1672 }
1673 kauth_authorize_fileop(fg->fg_cred, KAUTH_FILEOP_CLOSE,
1674 (uintptr_t)fg_get_data(fg), (uintptr_t)fileop_flags);
1675
1676 vnode_put((vnode_t)fg_get_data(fg));
1677 }
1678 }
1679
1680 if (fp->fp_flags & FP_AIOISSUED) {
1681 /*
1682 * cancel all async IO requests that can be cancelled.
1683 */
1684 _aio_close( p, fd );
1685 }
1686
1687 proc_fdlock(p);
1688 }
1689
1690 if (fd < fdp->fd_knlistsize) {
1691 knote_fdclose(p, fd);
1692 }
1693
1694 fileproc_drain(p, fp);
1695
1696 if (flags & FD_DUP2RESV) {
1697 fdp->fd_ofiles[fd] = NULL;
1698 fdp->fd_ofileflags[fd] &= ~UF_CLOSING;
1699 } else {
1700 fdrelse(p, fd);
1701 }
1702
1703 proc_fdunlock(p);
1704
1705 if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
1706 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
1707 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fg_get_data(fg)));
1708 }
1709
1710 fileproc_free(fp);
1711
1712 return fg_drop(p, fg);
1713 }
1714
1715
1716 /*
1717 * dupfdopen
1718 *
1719 * Description: Duplicate the specified descriptor to a free descriptor;
1720 * this is the second half of fdopen(), above.
1721 *
1722 * Parameters: p current process pointer
1723 * indx fd to dup to
1724 * dfd fd to dup from
1725 * mode mode to set on new fd
1726 * error command code
1727 *
1728 * Returns: 0 Success
1729 * EBADF Source fd is bad
1730 * EACCES Requested mode not allowed
1731 * !0 'error', if not ENODEV or
1732 * ENXIO
1733 *
1734 * Notes: XXX This is not thread safe; see fdopen() above
1735 */
1736 int
dupfdopen(proc_t p,int indx,int dfd,int flags,int error)1737 dupfdopen(proc_t p, int indx, int dfd, int flags, int error)
1738 {
1739 struct filedesc *fdp = &p->p_fd;
1740 struct fileproc *wfp;
1741 struct fileproc *fp;
1742 #if CONFIG_MACF
1743 int myerror;
1744 #endif
1745
1746 /*
1747 * If the to-be-dup'd fd number is greater than the allowed number
1748 * of file descriptors, or the fd to be dup'd has already been
1749 * closed, reject. Note, check for new == old is necessary as
1750 * falloc could allocate an already closed to-be-dup'd descriptor
1751 * as the new descriptor.
1752 */
1753 proc_fdlock(p);
1754
1755 fp = fdp->fd_ofiles[indx];
1756 if (dfd < 0 || dfd >= fdp->fd_nfiles ||
1757 (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
1758 (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
1759 proc_fdunlock(p);
1760 return EBADF;
1761 }
1762 #if CONFIG_MACF
1763 myerror = mac_file_check_dup(kauth_cred_get(), wfp->fp_glob, dfd);
1764 if (myerror) {
1765 proc_fdunlock(p);
1766 return myerror;
1767 }
1768 #endif
1769 /*
1770 * There are two cases of interest here.
1771 *
1772 * For ENODEV simply dup (dfd) to file descriptor
1773 * (indx) and return.
1774 *
1775 * For ENXIO steal away the file structure from (dfd) and
1776 * store it in (indx). (dfd) is effectively closed by
1777 * this operation.
1778 *
1779 * Any other error code is just returned.
1780 */
1781 switch (error) {
1782 case ENODEV:
1783 if (fp_isguarded(wfp, GUARD_DUP)) {
1784 proc_fdunlock(p);
1785 return EPERM;
1786 }
1787
1788 /*
1789 * Check that the mode the file is being opened for is a
1790 * subset of the mode of the existing descriptor.
1791 */
1792 if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1793 proc_fdunlock(p);
1794 return EACCES;
1795 }
1796 if (indx >= fdp->fd_afterlast) {
1797 fdp->fd_afterlast = indx + 1;
1798 }
1799
1800 if (fp->fp_glob) {
1801 fg_free(fp->fp_glob);
1802 }
1803 fg_ref(p, wfp->fp_glob);
1804 fp->fp_glob = wfp->fp_glob;
1805 /*
1806 * Historically, open(/dev/fd/<n>) preserves close on fork/exec,
1807 * unlike dup(), dup2() or fcntl(F_DUPFD).
1808 *
1809 * open1() already handled O_CLO{EXEC,FORK}
1810 */
1811 fp->fp_flags |= (wfp->fp_flags & (FP_CLOFORK | FP_CLOEXEC));
1812
1813 procfdtbl_releasefd(p, indx, NULL);
1814 fp_drop(p, indx, fp, 1);
1815 proc_fdunlock(p);
1816 return 0;
1817
1818 default:
1819 proc_fdunlock(p);
1820 return error;
1821 }
1822 /* NOTREACHED */
1823 }
1824
1825
1826 #pragma mark KPIS (sys/file.h)
1827
1828 /*
1829 * fg_get_vnode
1830 *
1831 * Description: Return vnode associated with the file structure, if
1832 * any. The lifetime of the returned vnode is bound to
1833 * the lifetime of the file structure.
1834 *
1835 * Parameters: fg Pointer to fileglob to
1836 * inspect
1837 *
1838 * Returns: vnode_t
1839 */
1840 vnode_t
fg_get_vnode(struct fileglob * fg)1841 fg_get_vnode(struct fileglob *fg)
1842 {
1843 if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1844 return (vnode_t)fg_get_data(fg);
1845 } else {
1846 return NULL;
1847 }
1848 }
1849
1850
1851 /*
1852 * fp_getfvp
1853 *
1854 * Description: Get fileproc and vnode pointer for a given fd from the per
1855 * process open file table of the specified process, and if
1856 * successful, increment the fp_iocount
1857 *
1858 * Parameters: p Process in which fd lives
1859 * fd fd to get information for
1860 * resultfp Pointer to result fileproc
1861 * pointer area, or 0 if none
1862 * resultvp Pointer to result vnode pointer
1863 * area, or 0 if none
1864 *
1865 * Returns: 0 Success
1866 * EBADF Bad file descriptor
1867 * ENOTSUP fd does not refer to a vnode
1868 *
1869 * Implicit returns:
1870 * *resultfp (modified) Fileproc pointer
1871 * *resultvp (modified) vnode pointer
1872 *
1873 * Notes: The resultfp and resultvp fields are optional, and may be
1874 * independently specified as NULL to skip returning information
1875 *
1876 * Locks: Internally takes and releases proc_fdlock
1877 */
1878 int
fp_getfvp(proc_t p,int fd,struct fileproc ** resultfp,struct vnode ** resultvp)1879 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
1880 {
1881 struct fileproc *fp;
1882 int error;
1883
1884 error = fp_get_ftype(p, fd, DTYPE_VNODE, ENOTSUP, &fp);
1885 if (error == 0) {
1886 if (resultfp) {
1887 *resultfp = fp;
1888 }
1889 if (resultvp) {
1890 *resultvp = (struct vnode *)fp_get_data(fp);
1891 }
1892 }
1893
1894 return error;
1895 }
1896
1897
1898 /*
1899 * fp_get_pipe_id
1900 *
1901 * Description: Get pipe id for a given fd from the per process open file table
1902 * of the specified process.
1903 *
1904 * Parameters: p Process in which fd lives
1905 * fd fd to get information for
1906 * result_pipe_id Pointer to result pipe id
1907 *
1908 * Returns: 0 Success
1909 * EIVAL NULL pointer arguments passed
1910 * fp_lookup:EBADF Bad file descriptor
1911 * ENOTSUP fd does not refer to a pipe
1912 *
1913 * Implicit returns:
1914 * *result_pipe_id (modified) pipe id
1915 *
1916 * Locks: Internally takes and releases proc_fdlock
1917 */
1918 int
fp_get_pipe_id(proc_t p,int fd,uint64_t * result_pipe_id)1919 fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
1920 {
1921 struct fileproc *fp = FILEPROC_NULL;
1922 struct fileglob *fg = NULL;
1923 int error = 0;
1924
1925 if (p == NULL || result_pipe_id == NULL) {
1926 return EINVAL;
1927 }
1928
1929 proc_fdlock(p);
1930 if ((error = fp_lookup(p, fd, &fp, 1))) {
1931 proc_fdunlock(p);
1932 return error;
1933 }
1934 fg = fp->fp_glob;
1935
1936 if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
1937 *result_pipe_id = pipe_id((struct pipe*)fg_get_data(fg));
1938 } else {
1939 error = ENOTSUP;
1940 }
1941
1942 fp_drop(p, fd, fp, 1);
1943 proc_fdunlock(p);
1944 return error;
1945 }
1946
1947
1948 /*
1949 * file_vnode
1950 *
1951 * Description: Given an fd, look it up in the current process's per process
1952 * open file table, and return its internal vnode pointer.
1953 *
1954 * Parameters: fd fd to obtain vnode from
1955 * vpp pointer to vnode return area
1956 *
1957 * Returns: 0 Success
1958 * EINVAL The fd does not refer to a
1959 * vnode fileproc entry
1960 * fp_lookup:EBADF Bad file descriptor
1961 *
1962 * Implicit returns:
1963 * *vpp (modified) Returned vnode pointer
1964 *
1965 * Locks: This function internally takes and drops the proc_fdlock for
1966 * the current process
1967 *
1968 * Notes: If successful, this function increments the fp_iocount on the
1969 * fd's corresponding fileproc.
1970 *
1971 * The fileproc referenced is not returned; because of this, care
1972 * must be taken to not drop the last reference (e.g. by closing
1973 * the file). This is inherently unsafe, since the reference may
1974 * not be recoverable from the vnode, if there is a subsequent
1975 * close that destroys the associate fileproc. The caller should
1976 * therefore retain their own reference on the fileproc so that
1977 * the fp_iocount can be dropped subsequently. Failure to do this
1978 * can result in the returned pointer immediately becoming invalid
1979 * following the call.
1980 *
1981 * Use of this function is discouraged.
1982 */
1983 int
file_vnode(int fd,struct vnode ** vpp)1984 file_vnode(int fd, struct vnode **vpp)
1985 {
1986 return file_vnode_withvid(fd, vpp, NULL);
1987 }
1988
1989
1990 /*
1991 * file_vnode_withvid
1992 *
1993 * Description: Given an fd, look it up in the current process's per process
1994 * open file table, and return its internal vnode pointer.
1995 *
1996 * Parameters: fd fd to obtain vnode from
1997 * vpp pointer to vnode return area
1998 * vidp pointer to vid of the returned vnode
1999 *
2000 * Returns: 0 Success
2001 * EINVAL The fd does not refer to a
2002 * vnode fileproc entry
2003 * fp_lookup:EBADF Bad file descriptor
2004 *
2005 * Implicit returns:
2006 * *vpp (modified) Returned vnode pointer
2007 *
2008 * Locks: This function internally takes and drops the proc_fdlock for
2009 * the current process
2010 *
2011 * Notes: If successful, this function increments the fp_iocount on the
2012 * fd's corresponding fileproc.
2013 *
2014 * The fileproc referenced is not returned; because of this, care
2015 * must be taken to not drop the last reference (e.g. by closing
2016 * the file). This is inherently unsafe, since the reference may
2017 * not be recoverable from the vnode, if there is a subsequent
2018 * close that destroys the associate fileproc. The caller should
2019 * therefore retain their own reference on the fileproc so that
2020 * the fp_iocount can be dropped subsequently. Failure to do this
2021 * can result in the returned pointer immediately becoming invalid
2022 * following the call.
2023 *
2024 * Use of this function is discouraged.
2025 */
2026 int
file_vnode_withvid(int fd,struct vnode ** vpp,uint32_t * vidp)2027 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t *vidp)
2028 {
2029 struct fileproc *fp;
2030 int error;
2031
2032 error = fp_get_ftype(current_proc(), fd, DTYPE_VNODE, EINVAL, &fp);
2033 if (error == 0) {
2034 if (vpp) {
2035 *vpp = (struct vnode *)fp_get_data(fp);
2036 }
2037 if (vidp) {
2038 *vidp = vnode_vid((struct vnode *)fp_get_data(fp));
2039 }
2040 }
2041 return error;
2042 }
2043
2044 /*
2045 * file_socket
2046 *
2047 * Description: Given an fd, look it up in the current process's per process
2048 * open file table, and return its internal socket pointer.
2049 *
2050 * Parameters: fd fd to obtain vnode from
2051 * sp pointer to socket return area
2052 *
2053 * Returns: 0 Success
2054 * ENOTSOCK Not a socket
2055 * fp_lookup:EBADF Bad file descriptor
2056 *
2057 * Implicit returns:
2058 * *sp (modified) Returned socket pointer
2059 *
2060 * Locks: This function internally takes and drops the proc_fdlock for
2061 * the current process
2062 *
2063 * Notes: If successful, this function increments the fp_iocount on the
2064 * fd's corresponding fileproc.
2065 *
2066 * The fileproc referenced is not returned; because of this, care
2067 * must be taken to not drop the last reference (e.g. by closing
2068 * the file). This is inherently unsafe, since the reference may
2069 * not be recoverable from the socket, if there is a subsequent
2070 * close that destroys the associate fileproc. The caller should
2071 * therefore retain their own reference on the fileproc so that
2072 * the fp_iocount can be dropped subsequently. Failure to do this
2073 * can result in the returned pointer immediately becoming invalid
2074 * following the call.
2075 *
2076 * Use of this function is discouraged.
2077 */
2078 int
file_socket(int fd,struct socket ** sp)2079 file_socket(int fd, struct socket **sp)
2080 {
2081 struct fileproc *fp;
2082 int error;
2083
2084 error = fp_get_ftype(current_proc(), fd, DTYPE_SOCKET, ENOTSOCK, &fp);
2085 if (error == 0) {
2086 if (sp) {
2087 *sp = (struct socket *)fp_get_data(fp);
2088 }
2089 }
2090 return error;
2091 }
2092
2093
2094 /*
2095 * file_flags
2096 *
2097 * Description: Given an fd, look it up in the current process's per process
2098 * open file table, and return its fileproc's flags field.
2099 *
2100 * Parameters: fd fd whose flags are to be
2101 * retrieved
2102 * flags pointer to flags data area
2103 *
2104 * Returns: 0 Success
2105 * ENOTSOCK Not a socket
2106 * fp_lookup:EBADF Bad file descriptor
2107 *
2108 * Implicit returns:
2109 * *flags (modified) Returned flags field
2110 *
2111 * Locks: This function internally takes and drops the proc_fdlock for
2112 * the current process
2113 */
2114 int
file_flags(int fd,int * flags)2115 file_flags(int fd, int *flags)
2116 {
2117 proc_t p = current_proc();
2118 struct fileproc *fp;
2119 int error = EBADF;
2120
2121 proc_fdlock_spin(p);
2122 fp = fp_get_noref_locked(p, fd);
2123 if (fp) {
2124 *flags = (int)fp->f_flag;
2125 error = 0;
2126 }
2127 proc_fdunlock(p);
2128
2129 return error;
2130 }
2131
2132
2133 /*
2134 * file_drop
2135 *
2136 * Description: Drop an iocount reference on an fd, and wake up any waiters
2137 * for draining (i.e. blocked in fileproc_drain() called during
2138 * the last attempt to close a file).
2139 *
2140 * Parameters: fd fd on which an ioreference is
2141 * to be dropped
2142 *
2143 * Returns: 0 Success
2144 *
2145 * Description: Given an fd, look it up in the current process's per process
2146 * open file table, and drop it's fileproc's fp_iocount by one
2147 *
2148 * Notes: This is intended as a corresponding operation to the functions
2149 * file_vnode() and file_socket() operations.
2150 *
2151 * If the caller can't possibly hold an I/O reference,
2152 * this function will panic the kernel rather than allowing
2153 * for memory corruption. Callers should always call this
2154 * because they acquired an I/O reference on this file before.
2155 *
2156 * Use of this function is discouraged.
2157 */
2158 int
file_drop(int fd)2159 file_drop(int fd)
2160 {
2161 struct fileproc *fp;
2162 proc_t p = current_proc();
2163 struct filedesc *fdp = &p->p_fd;
2164 int needwakeup = 0;
2165
2166 proc_fdlock_spin(p);
2167 fp = fp_get_noref_locked_with_iocount(p, fd);
2168
2169 if (1 == os_ref_release_locked(&fp->fp_iocount)) {
2170 if (fp->fp_flags & FP_SELCONFLICT) {
2171 fp->fp_flags &= ~FP_SELCONFLICT;
2172 }
2173
2174 if (fdp->fd_fpdrainwait) {
2175 fdp->fd_fpdrainwait = 0;
2176 needwakeup = 1;
2177 }
2178 }
2179 proc_fdunlock(p);
2180
2181 if (needwakeup) {
2182 wakeup(&fdp->fd_fpdrainwait);
2183 }
2184 return 0;
2185 }
2186
2187
2188 int
fd_rdwr(int fd,enum uio_rw rw,uint64_t base,int64_t len,enum uio_seg segflg,off_t offset,int io_flg,int64_t * aresid)2189 fd_rdwr(
2190 int fd,
2191 enum uio_rw rw,
2192 uint64_t base,
2193 int64_t len,
2194 enum uio_seg segflg,
2195 off_t offset,
2196 int io_flg,
2197 int64_t *aresid)
2198 {
2199 struct fileproc *fp;
2200 proc_t p;
2201 int error = 0;
2202 int flags = 0;
2203 int spacetype;
2204 uio_t auio = NULL;
2205 uio_stackbuf_t uio_buf[UIO_SIZEOF(1)];
2206 struct vfs_context context = *(vfs_context_current());
2207
2208 p = current_proc();
2209
2210 error = fp_lookup(p, fd, &fp, 0);
2211 if (error) {
2212 return error;
2213 }
2214
2215 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
2216 case DTYPE_VNODE:
2217 case DTYPE_PIPE:
2218 case DTYPE_SOCKET:
2219 break;
2220 default:
2221 error = EINVAL;
2222 goto out;
2223 }
2224 if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
2225 error = EBADF;
2226 goto out;
2227 }
2228
2229 if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
2230 error = EBADF;
2231 goto out;
2232 }
2233
2234 context.vc_ucred = fp->fp_glob->fg_cred;
2235
2236 if (UIO_SEG_IS_USER_SPACE(segflg)) {
2237 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
2238 } else {
2239 spacetype = UIO_SYSSPACE;
2240 }
2241
2242 auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
2243
2244 uio_addiov(auio, (user_addr_t)base, (user_size_t)len);
2245
2246 if (!(io_flg & IO_APPEND)) {
2247 flags = FOF_OFFSET;
2248 }
2249
2250 if (rw == UIO_WRITE) {
2251 user_ssize_t orig_resid = uio_resid(auio);
2252 error = fo_write(fp, auio, flags, &context);
2253 if (uio_resid(auio) < orig_resid) {
2254 os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
2255 }
2256 } else {
2257 error = fo_read(fp, auio, flags, &context);
2258 }
2259
2260 if (aresid) {
2261 *aresid = uio_resid(auio);
2262 } else if (uio_resid(auio) && error == 0) {
2263 error = EIO;
2264 }
2265 out:
2266 fp_drop(p, fd, fp, 0);
2267 return error;
2268 }
2269
2270
2271 #pragma mark syscalls
2272
2273 #ifndef HFS_GET_BOOT_INFO
2274 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
2275 #endif
2276
2277 #ifndef HFS_SET_BOOT_INFO
2278 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
2279 #endif
2280
2281 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
2282 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
2283 #endif
2284
2285 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
2286 (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
2287 (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
2288 ? 1 : 0)
2289
2290 /*
2291 * sys_getdtablesize
2292 *
2293 * Description: Returns the per process maximum size of the descriptor table
2294 *
2295 * Parameters: p Process being queried
2296 * retval Pointer to the call return area
2297 *
2298 * Returns: 0 Success
2299 *
2300 * Implicit returns:
2301 * *retval (modified) Size of dtable
2302 */
2303 int
sys_getdtablesize(proc_t p,__unused struct getdtablesize_args * uap,int32_t * retval)2304 sys_getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
2305 {
2306 *retval = proc_limitgetcur_nofile(p);
2307 return 0;
2308 }
2309
2310
2311 /*
2312 * check_file_seek_range
2313 *
2314 * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
2315 *
2316 * Parameters: fl Flock structure.
2317 * cur_file_offset Current offset in the file.
2318 *
2319 * Returns: 0 on Success.
2320 * EOVERFLOW on overflow.
2321 * EINVAL on offset less than zero.
2322 */
2323
2324 static int
check_file_seek_range(struct flock * fl,off_t cur_file_offset)2325 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
2326 {
2327 if (fl->l_whence == SEEK_CUR) {
2328 /* Check if the start marker is beyond LLONG_MAX. */
2329 if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
2330 /* Check if start marker is negative */
2331 if (fl->l_start < 0) {
2332 return EINVAL;
2333 }
2334 return EOVERFLOW;
2335 }
2336 /* Check if the start marker is negative. */
2337 if (fl->l_start + cur_file_offset < 0) {
2338 return EINVAL;
2339 }
2340 /* Check if end marker is beyond LLONG_MAX. */
2341 if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
2342 cur_file_offset, fl->l_len - 1))) {
2343 return EOVERFLOW;
2344 }
2345 /* Check if the end marker is negative. */
2346 if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
2347 fl->l_len < 0)) {
2348 return EINVAL;
2349 }
2350 } else if (fl->l_whence == SEEK_SET) {
2351 /* Check if the start marker is negative. */
2352 if (fl->l_start < 0) {
2353 return EINVAL;
2354 }
2355 /* Check if the end marker is beyond LLONG_MAX. */
2356 if ((fl->l_len > 0) &&
2357 CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
2358 return EOVERFLOW;
2359 }
2360 /* Check if the end marker is negative. */
2361 if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
2362 return EINVAL;
2363 }
2364 }
2365 return 0;
2366 }
2367
2368
2369 /*
2370 * sys_dup
2371 *
2372 * Description: Duplicate a file descriptor.
2373 *
2374 * Parameters: p Process performing the dup
2375 * uap->fd The fd to dup
2376 * retval Pointer to the call return area
2377 *
2378 * Returns: 0 Success
2379 * !0 Errno
2380 *
2381 * Implicit returns:
2382 * *retval (modified) The new descriptor
2383 */
2384 int
sys_dup(proc_t p,struct dup_args * uap,int32_t * retval)2385 sys_dup(proc_t p, struct dup_args *uap, int32_t *retval)
2386 {
2387 struct filedesc *fdp = &p->p_fd;
2388 int old = uap->fd;
2389 int new, error;
2390 struct fileproc *fp;
2391
2392 proc_fdlock(p);
2393 if ((error = fp_lookup(p, old, &fp, 1))) {
2394 proc_fdunlock(p);
2395 return error;
2396 }
2397 if (fp_isguarded(fp, GUARD_DUP)) {
2398 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2399 (void) fp_drop(p, old, fp, 1);
2400 proc_fdunlock(p);
2401 return error;
2402 }
2403 if ((error = fdalloc(p, 0, &new))) {
2404 fp_drop(p, old, fp, 1);
2405 proc_fdunlock(p);
2406 return error;
2407 }
2408 error = finishdup(p, fdp, old, new, 0, retval);
2409 fp_drop(p, old, fp, 1);
2410 proc_fdunlock(p);
2411
2412 if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
2413 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
2414 new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp_get_data(fp)));
2415 }
2416
2417 return error;
2418 }
2419
2420 /*
2421 * sys_dup2
2422 *
2423 * Description: Duplicate a file descriptor to a particular value.
2424 *
2425 * Parameters: p Process performing the dup
2426 * uap->from The fd to dup
2427 * uap->to The fd to dup it to
2428 * retval Pointer to the call return area
2429 *
2430 * Returns: 0 Success
2431 * !0 Errno
2432 *
2433 * Implicit returns:
2434 * *retval (modified) The new descriptor
2435 */
2436 int
sys_dup2(proc_t p,struct dup2_args * uap,int32_t * retval)2437 sys_dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
2438 {
2439 return dup2(p, uap->from, uap->to, retval);
2440 }
2441
2442 int
dup2(proc_t p,int old,int new,int * retval)2443 dup2(proc_t p, int old, int new, int *retval)
2444 {
2445 struct filedesc *fdp = &p->p_fd;
2446 struct fileproc *fp, *nfp;
2447 int i, error;
2448
2449 proc_fdlock(p);
2450
2451 startover:
2452 if ((error = fp_lookup(p, old, &fp, 1))) {
2453 proc_fdunlock(p);
2454 return error;
2455 }
2456 if (fp_isguarded(fp, GUARD_DUP)) {
2457 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2458 (void) fp_drop(p, old, fp, 1);
2459 proc_fdunlock(p);
2460 return error;
2461 }
2462 if (new < 0 || new >= proc_limitgetcur_nofile(p)) {
2463 fp_drop(p, old, fp, 1);
2464 proc_fdunlock(p);
2465 return EBADF;
2466 }
2467 if (old == new) {
2468 fp_drop(p, old, fp, 1);
2469 *retval = new;
2470 proc_fdunlock(p);
2471 return 0;
2472 }
2473 if (new < 0 || new >= fdp->fd_nfiles) {
2474 if ((error = fdalloc(p, new, &i))) {
2475 fp_drop(p, old, fp, 1);
2476 proc_fdunlock(p);
2477 return error;
2478 }
2479 if (new != i) {
2480 fdrelse(p, i);
2481 goto closeit;
2482 }
2483 } else {
2484 closeit:
2485 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
2486 fp_drop(p, old, fp, 1);
2487 procfdtbl_waitfd(p, new);
2488 #if DIAGNOSTIC
2489 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2490 #endif
2491 goto startover;
2492 }
2493
2494 if ((nfp = fdp->fd_ofiles[new]) != NULL) {
2495 if (fp_isguarded(nfp, GUARD_CLOSE)) {
2496 fp_drop(p, old, fp, 1);
2497 error = fp_guard_exception(p,
2498 new, nfp, kGUARD_EXC_CLOSE);
2499 proc_fdunlock(p);
2500 return error;
2501 }
2502 (void)fp_close_and_unlock(p, new, nfp, FD_DUP2RESV);
2503 proc_fdlock(p);
2504 assert(fdp->fd_ofileflags[new] & UF_RESERVED);
2505 } else {
2506 #if DIAGNOSTIC
2507 if (fdp->fd_ofiles[new] != NULL) {
2508 panic("dup2: no ref on fileproc %d", new);
2509 }
2510 #endif
2511 procfdtbl_reservefd(p, new);
2512 }
2513 }
2514 #if DIAGNOSTIC
2515 if (fdp->fd_ofiles[new] != 0) {
2516 panic("dup2: overwriting fd_ofiles with new %d", new);
2517 }
2518 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
2519 panic("dup2: unreserved fileflags with new %d", new);
2520 }
2521 #endif
2522 error = finishdup(p, fdp, old, new, 0, retval);
2523 fp_drop(p, old, fp, 1);
2524 proc_fdunlock(p);
2525
2526 return error;
2527 }
2528
2529
2530 /*
2531 * fcntl
2532 *
2533 * Description: The file control system call.
2534 *
2535 * Parameters: p Process performing the fcntl
2536 * uap->fd The fd to operate against
2537 * uap->cmd The command to perform
2538 * uap->arg Pointer to the command argument
2539 * retval Pointer to the call return area
2540 *
2541 * Returns: 0 Success
2542 * !0 Errno (see fcntl_nocancel)
2543 *
2544 * Implicit returns:
2545 * *retval (modified) fcntl return value (if any)
2546 *
2547 * Notes: This system call differs from fcntl_nocancel() in that it
2548 * tests for cancellation prior to performing a potentially
2549 * blocking operation.
2550 */
2551 int
sys_fcntl(proc_t p,struct fcntl_args * uap,int32_t * retval)2552 sys_fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
2553 {
2554 __pthread_testcancel(1);
2555 return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
2556 }
2557
2558 #define ACCOUNT_OPENFROM_ENTITLEMENT \
2559 "com.apple.private.vfs.role-account-openfrom"
2560
2561 /*
2562 * sys_fcntl_nocancel
2563 *
2564 * Description: A non-cancel-testing file control system call.
2565 *
2566 * Parameters: p Process performing the fcntl
2567 * uap->fd The fd to operate against
2568 * uap->cmd The command to perform
2569 * uap->arg Pointer to the command argument
2570 * retval Pointer to the call return area
2571 *
2572 * Returns: 0 Success
2573 * EINVAL
2574 * fp_lookup:EBADF Bad file descriptor
2575 * [F_DUPFD]
2576 * fdalloc:EMFILE
2577 * fdalloc:ENOMEM
2578 * finishdup:EBADF
2579 * finishdup:ENOMEM
2580 * [F_SETOWN]
2581 * ESRCH
2582 * [F_SETLK]
2583 * EBADF
2584 * EOVERFLOW
2585 * copyin:EFAULT
2586 * vnode_getwithref:???
2587 * VNOP_ADVLOCK:???
2588 * msleep:ETIMEDOUT
2589 * [F_GETLK]
2590 * EBADF
2591 * EOVERFLOW
2592 * copyin:EFAULT
2593 * copyout:EFAULT
2594 * vnode_getwithref:???
2595 * VNOP_ADVLOCK:???
2596 * [F_PREALLOCATE]
2597 * EBADF
2598 * EINVAL
2599 * copyin:EFAULT
2600 * copyout:EFAULT
2601 * vnode_getwithref:???
2602 * VNOP_ALLOCATE:???
2603 * [F_SETSIZE,F_RDADVISE]
2604 * EBADF
2605 * EINVAL
2606 * copyin:EFAULT
2607 * vnode_getwithref:???
2608 * [F_RDAHEAD,F_NOCACHE]
2609 * EBADF
2610 * vnode_getwithref:???
2611 * [???]
2612 *
2613 * Implicit returns:
2614 * *retval (modified) fcntl return value (if any)
2615 */
2616 #define SYS_FCNTL_DECLARE_VFS_CONTEXT(context) \
2617 struct vfs_context context = { \
2618 .vc_thread = current_thread(), \
2619 .vc_ucred = fp->f_cred, \
2620 }
2621
2622 static user_addr_t
sys_fnctl_parse_arg(proc_t p,user_long_t arg)2623 sys_fnctl_parse_arg(proc_t p, user_long_t arg)
2624 {
2625 /*
2626 * Since the arg parameter is defined as a long but may be
2627 * either a long or a pointer we must take care to handle
2628 * sign extension issues. Our sys call munger will sign
2629 * extend a long when we are called from a 32-bit process.
2630 * Since we can never have an address greater than 32-bits
2631 * from a 32-bit process we lop off the top 32-bits to avoid
2632 * getting the wrong address
2633 */
2634 return proc_is64bit(p) ? arg : CAST_USER_ADDR_T((uint32_t)arg);
2635 }
2636
2637 /* cleanup code common to fnctl functions, for when the fdlock is still held */
2638 static int
sys_fcntl_out(proc_t p,int fd,struct fileproc * fp,int error)2639 sys_fcntl_out(proc_t p, int fd, struct fileproc *fp, int error)
2640 {
2641 fp_drop(p, fd, fp, 1);
2642 proc_fdunlock(p);
2643 return error;
2644 }
2645
2646 /* cleanup code common to fnctl acting on vnodes, once they unlocked the fdlock */
2647 static int
sys_fcntl_outdrop(proc_t p,int fd,struct fileproc * fp,struct vnode * vp,int error)2648 sys_fcntl_outdrop(proc_t p, int fd, struct fileproc *fp, struct vnode *vp, int error)
2649 {
2650 #pragma unused(vp)
2651
2652 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2653 fp_drop(p, fd, fp, 0);
2654 return error;
2655 }
2656
2657 typedef int (*sys_fnctl_handler_t)(proc_t p, int fd, int cmd, user_long_t arg,
2658 struct fileproc *fp, int32_t *retval);
2659
2660 typedef int (*sys_fnctl_vnode_handler_t)(proc_t p, int fd, int cmd,
2661 user_long_t arg, struct fileproc *fp, struct vnode *vp, int32_t *retval);
2662
2663 /*
2664 * SPI (private) for opening a file starting from a dir fd
2665 *
2666 * Note: do not inline to keep stack usage under control.
2667 */
2668 __attribute__((noinline))
2669 static int
sys_fcntl__OPENFROM(proc_t p,int fd,int cmd,user_long_t arg,struct fileproc * fp,struct vnode * vp,int32_t * retval)2670 sys_fcntl__OPENFROM(proc_t p, int fd, int cmd, user_long_t arg,
2671 struct fileproc *fp, struct vnode *vp, int32_t *retval)
2672 {
2673 #pragma unused(cmd)
2674
2675 user_addr_t argp = sys_fnctl_parse_arg(p, arg);
2676 struct user_fopenfrom fopen;
2677 struct vnode_attr *va;
2678 struct nameidata *nd;
2679 int error, cmode;
2680 bool has_entitlement;
2681
2682 /* Check if this isn't a valid file descriptor */
2683 if ((fp->f_flag & FREAD) == 0) {
2684 return sys_fcntl_out(p, fd, fp, EBADF);
2685 }
2686 proc_fdunlock(p);
2687
2688 if (vnode_getwithref(vp)) {
2689 error = ENOENT;
2690 goto outdrop;
2691 }
2692
2693 /* Only valid for directories */
2694 if (vp->v_type != VDIR) {
2695 vnode_put(vp);
2696 error = ENOTDIR;
2697 goto outdrop;
2698 }
2699
2700 /*
2701 * Only entitled apps may use the credentials of the thread
2702 * that opened the file descriptor.
2703 * Non-entitled threads will use their own context.
2704 */
2705 has_entitlement = IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT);
2706
2707 /* Get flags, mode and pathname arguments. */
2708 if (IS_64BIT_PROCESS(p)) {
2709 error = copyin(argp, &fopen, sizeof(fopen));
2710 } else {
2711 struct user32_fopenfrom fopen32;
2712
2713 error = copyin(argp, &fopen32, sizeof(fopen32));
2714 fopen.o_flags = fopen32.o_flags;
2715 fopen.o_mode = fopen32.o_mode;
2716 fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
2717 }
2718 if (error) {
2719 vnode_put(vp);
2720 goto outdrop;
2721 }
2722
2723 /* open1() can have really deep stacks, so allocate those */
2724 va = kalloc_type(struct vnode_attr, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2725 nd = kalloc_type(struct nameidata, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2726
2727 AUDIT_ARG(fflags, fopen.o_flags);
2728 AUDIT_ARG(mode, fopen.o_mode);
2729 VATTR_INIT(va);
2730 /* Mask off all but regular access permissions */
2731 cmode = ((fopen.o_mode & ~p->p_fd.fd_cmask) & ALLPERMS) & ~S_ISTXT;
2732 VATTR_SET(va, va_mode, cmode & ACCESSPERMS);
2733
2734 SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2735
2736 /* Start the lookup relative to the file descriptor's vnode. */
2737 NDINIT(nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2738 fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
2739 nd->ni_dvp = vp;
2740
2741 error = open1(has_entitlement ? &context : vfs_context_current(),
2742 nd, fopen.o_flags, va, NULL, NULL, retval);
2743
2744 kfree_type(struct vnode_attr, va);
2745 kfree_type(struct nameidata, nd);
2746
2747 vnode_put(vp);
2748
2749 outdrop:
2750 return sys_fcntl_outdrop(p, fd, fp, vp, error);
2751 }
2752
2753 int
sys_fcntl_nocancel(proc_t p,struct fcntl_nocancel_args * uap,int32_t * retval)2754 sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
2755 {
2756 int fd = uap->fd;
2757 int cmd = uap->cmd;
2758 struct filedesc *fdp = &p->p_fd;
2759 struct fileproc *fp;
2760 struct vnode *vp = NULLVP; /* for AUDIT_ARG() at end */
2761 unsigned int oflags, nflags;
2762 int i, tmp, error, error2, flg = 0;
2763 struct flock fl = {};
2764 struct flocktimeout fltimeout;
2765 struct timespec *timeout = NULL;
2766 off_t offset;
2767 int newmin;
2768 daddr64_t lbn, bn;
2769 unsigned int fflag;
2770 user_addr_t argp;
2771 boolean_t is64bit;
2772 int has_entitlement = 0;
2773
2774 AUDIT_ARG(fd, uap->fd);
2775 AUDIT_ARG(cmd, uap->cmd);
2776
2777 proc_fdlock(p);
2778 if ((error = fp_lookup(p, fd, &fp, 1))) {
2779 proc_fdunlock(p);
2780 return error;
2781 }
2782
2783 SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2784
2785 is64bit = proc_is64bit(p);
2786 if (is64bit) {
2787 argp = uap->arg;
2788 } else {
2789 /*
2790 * Since the arg parameter is defined as a long but may be
2791 * either a long or a pointer we must take care to handle
2792 * sign extension issues. Our sys call munger will sign
2793 * extend a long when we are called from a 32-bit process.
2794 * Since we can never have an address greater than 32-bits
2795 * from a 32-bit process we lop off the top 32-bits to avoid
2796 * getting the wrong address
2797 */
2798 argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
2799 }
2800
2801 #if CONFIG_MACF
2802 error = mac_file_check_fcntl(kauth_cred_get(), fp->fp_glob, cmd, uap->arg);
2803 if (error) {
2804 goto out;
2805 }
2806 #endif
2807
2808 switch (cmd) {
2809 case F_DUPFD:
2810 case F_DUPFD_CLOEXEC:
2811 if (fp_isguarded(fp, GUARD_DUP)) {
2812 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
2813 goto out;
2814 }
2815 newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2816 AUDIT_ARG(value32, newmin);
2817 if (newmin < 0 || newmin >= proc_limitgetcur_nofile(p)) {
2818 error = EINVAL;
2819 goto out;
2820 }
2821 if ((error = fdalloc(p, newmin, &i))) {
2822 goto out;
2823 }
2824 error = finishdup(p, fdp, fd, i,
2825 cmd == F_DUPFD_CLOEXEC ? FP_CLOEXEC : 0, retval);
2826 goto out;
2827
2828 case F_GETFD:
2829 *retval = (fp->fp_flags & FP_CLOEXEC) ? FD_CLOEXEC : 0;
2830 error = 0;
2831 goto out;
2832
2833 case F_SETFD:
2834 AUDIT_ARG(value32, (uint32_t)uap->arg);
2835 if (uap->arg & FD_CLOEXEC) {
2836 fp->fp_flags |= FP_CLOEXEC;
2837 error = 0;
2838 } else if (!fp->fp_guard_attrs) {
2839 fp->fp_flags &= ~FP_CLOEXEC;
2840 error = 0;
2841 } else {
2842 error = fp_guard_exception(p,
2843 fd, fp, kGUARD_EXC_NOCLOEXEC);
2844 }
2845 goto out;
2846
2847 case F_GETFL:
2848 *retval = OFLAGS(fp->f_flag);
2849 error = 0;
2850 goto out;
2851
2852 case F_SETFL:
2853 // FIXME (rdar://54898652)
2854 //
2855 // this code is broken if fnctl(F_SETFL), ioctl() are
2856 // called concurrently for the same fileglob.
2857
2858 tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2859 AUDIT_ARG(value32, tmp);
2860
2861 os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
2862 nflags = oflags & ~FCNTLFLAGS;
2863 nflags |= FFLAGS(tmp) & FCNTLFLAGS;
2864 });
2865 tmp = nflags & FNONBLOCK;
2866 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2867 if (error) {
2868 goto out;
2869 }
2870 tmp = nflags & FASYNC;
2871 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
2872 if (!error) {
2873 goto out;
2874 }
2875 os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
2876 tmp = 0;
2877 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2878 goto out;
2879
2880 case F_GETOWN:
2881 if (fp->f_type == DTYPE_SOCKET) {
2882 *retval = ((struct socket *)fp_get_data(fp))->so_pgid;
2883 error = 0;
2884 goto out;
2885 }
2886 error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
2887 *retval = -*retval;
2888 goto out;
2889
2890 case F_SETOWN:
2891 tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
2892 AUDIT_ARG(value32, tmp);
2893 if (fp->f_type == DTYPE_SOCKET) {
2894 ((struct socket *)fp_get_data(fp))->so_pgid = tmp;
2895 error = 0;
2896 goto out;
2897 }
2898 if (fp->f_type == DTYPE_PIPE) {
2899 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2900 goto out;
2901 }
2902
2903 if (tmp <= 0) {
2904 tmp = -tmp;
2905 } else {
2906 proc_t p1 = proc_find(tmp);
2907 if (p1 == 0) {
2908 error = ESRCH;
2909 goto out;
2910 }
2911 tmp = (int)p1->p_pgrpid;
2912 proc_rele(p1);
2913 }
2914 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2915 goto out;
2916
2917 case F_SETNOSIGPIPE:
2918 tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
2919 if (fp->f_type == DTYPE_SOCKET) {
2920 #if SOCKETS
2921 error = sock_setsockopt((struct socket *)fp_get_data(fp),
2922 SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
2923 #else
2924 error = EINVAL;
2925 #endif
2926 } else {
2927 struct fileglob *fg = fp->fp_glob;
2928
2929 lck_mtx_lock_spin(&fg->fg_lock);
2930 if (tmp) {
2931 fg->fg_lflags |= FG_NOSIGPIPE;
2932 } else {
2933 fg->fg_lflags &= ~FG_NOSIGPIPE;
2934 }
2935 lck_mtx_unlock(&fg->fg_lock);
2936 error = 0;
2937 }
2938 goto out;
2939
2940 case F_GETNOSIGPIPE:
2941 if (fp->f_type == DTYPE_SOCKET) {
2942 #if SOCKETS
2943 int retsize = sizeof(*retval);
2944 error = sock_getsockopt((struct socket *)fp_get_data(fp),
2945 SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
2946 #else
2947 error = EINVAL;
2948 #endif
2949 } else {
2950 *retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
2951 1 : 0;
2952 error = 0;
2953 }
2954 goto out;
2955
2956 case F_SETCONFINED:
2957 /*
2958 * If this is the only reference to this fglob in the process
2959 * and it's already marked as close-on-fork then mark it as
2960 * (immutably) "confined" i.e. any fd that points to it will
2961 * forever be close-on-fork, and attempts to use an IPC
2962 * mechanism to move the descriptor elsewhere will fail.
2963 */
2964 if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
2965 struct fileglob *fg = fp->fp_glob;
2966
2967 lck_mtx_lock_spin(&fg->fg_lock);
2968 if (fg->fg_lflags & FG_CONFINED) {
2969 error = 0;
2970 } else if (1 != os_ref_get_count_raw(&fg->fg_count)) {
2971 error = EAGAIN; /* go close the dup .. */
2972 } else if (fp->fp_flags & FP_CLOFORK) {
2973 fg->fg_lflags |= FG_CONFINED;
2974 error = 0;
2975 } else {
2976 error = EBADF; /* open without O_CLOFORK? */
2977 }
2978 lck_mtx_unlock(&fg->fg_lock);
2979 } else {
2980 /*
2981 * Other subsystems may have built on the immutability
2982 * of FG_CONFINED; clearing it may be tricky.
2983 */
2984 error = EPERM; /* immutable */
2985 }
2986 goto out;
2987
2988 case F_GETCONFINED:
2989 *retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? 1 : 0;
2990 error = 0;
2991 goto out;
2992
2993 case F_SETLKWTIMEOUT:
2994 case F_SETLKW:
2995 case F_OFD_SETLKWTIMEOUT:
2996 case F_OFD_SETLKW:
2997 flg |= F_WAIT;
2998 OS_FALLTHROUGH;
2999
3000 case F_SETLK:
3001 case F_OFD_SETLK:
3002 if (fp->f_type != DTYPE_VNODE) {
3003 error = EBADF;
3004 goto out;
3005 }
3006 vp = (struct vnode *)fp_get_data(fp);
3007
3008 fflag = fp->f_flag;
3009 offset = fp->f_offset;
3010 proc_fdunlock(p);
3011
3012 /* Copy in the lock structure */
3013 if (F_SETLKWTIMEOUT == cmd || F_OFD_SETLKWTIMEOUT == cmd) {
3014 error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
3015 if (error) {
3016 goto outdrop;
3017 }
3018 fl = fltimeout.fl;
3019 timeout = &fltimeout.timeout;
3020 } else {
3021 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3022 if (error) {
3023 goto outdrop;
3024 }
3025 }
3026
3027 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3028 /* and ending byte for EOVERFLOW in SEEK_SET */
3029 error = check_file_seek_range(&fl, offset);
3030 if (error) {
3031 goto outdrop;
3032 }
3033
3034 if ((error = vnode_getwithref(vp))) {
3035 goto outdrop;
3036 }
3037 if (fl.l_whence == SEEK_CUR) {
3038 fl.l_start += offset;
3039 }
3040
3041 #if CONFIG_MACF
3042 error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3043 F_SETLK, &fl);
3044 if (error) {
3045 (void)vnode_put(vp);
3046 goto outdrop;
3047 }
3048 #endif
3049 switch (cmd) {
3050 case F_OFD_SETLK:
3051 case F_OFD_SETLKW:
3052 case F_OFD_SETLKWTIMEOUT:
3053 flg |= F_OFD_LOCK;
3054 switch (fl.l_type) {
3055 case F_RDLCK:
3056 if ((fflag & FREAD) == 0) {
3057 error = EBADF;
3058 break;
3059 }
3060 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3061 F_SETLK, &fl, flg, &context, timeout);
3062 break;
3063 case F_WRLCK:
3064 if ((fflag & FWRITE) == 0) {
3065 error = EBADF;
3066 break;
3067 }
3068 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3069 F_SETLK, &fl, flg, &context, timeout);
3070 break;
3071 case F_UNLCK:
3072 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3073 F_UNLCK, &fl, F_OFD_LOCK, &context,
3074 timeout);
3075 break;
3076 default:
3077 error = EINVAL;
3078 break;
3079 }
3080 if (0 == error &&
3081 (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
3082 struct fileglob *fg = fp->fp_glob;
3083
3084 /*
3085 * arrange F_UNLCK on last close (once
3086 * set, FG_HAS_OFDLOCK is immutable)
3087 */
3088 if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
3089 lck_mtx_lock_spin(&fg->fg_lock);
3090 fg->fg_lflags |= FG_HAS_OFDLOCK;
3091 lck_mtx_unlock(&fg->fg_lock);
3092 }
3093 }
3094 break;
3095 default:
3096 flg |= F_POSIX;
3097 switch (fl.l_type) {
3098 case F_RDLCK:
3099 if ((fflag & FREAD) == 0) {
3100 error = EBADF;
3101 break;
3102 }
3103 // XXX UInt32 unsafe for LP64 kernel
3104 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3105 error = VNOP_ADVLOCK(vp, (caddr_t)p,
3106 F_SETLK, &fl, flg, &context, timeout);
3107 break;
3108 case F_WRLCK:
3109 if ((fflag & FWRITE) == 0) {
3110 error = EBADF;
3111 break;
3112 }
3113 // XXX UInt32 unsafe for LP64 kernel
3114 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3115 error = VNOP_ADVLOCK(vp, (caddr_t)p,
3116 F_SETLK, &fl, flg, &context, timeout);
3117 break;
3118 case F_UNLCK:
3119 error = VNOP_ADVLOCK(vp, (caddr_t)p,
3120 F_UNLCK, &fl, F_POSIX, &context, timeout);
3121 break;
3122 default:
3123 error = EINVAL;
3124 break;
3125 }
3126 break;
3127 }
3128 (void) vnode_put(vp);
3129 goto outdrop;
3130
3131 case F_GETLK:
3132 case F_OFD_GETLK:
3133 case F_GETLKPID:
3134 case F_OFD_GETLKPID:
3135 if (fp->f_type != DTYPE_VNODE) {
3136 error = EBADF;
3137 goto out;
3138 }
3139 vp = (struct vnode *)fp_get_data(fp);
3140
3141 offset = fp->f_offset;
3142 proc_fdunlock(p);
3143
3144 /* Copy in the lock structure */
3145 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3146 if (error) {
3147 goto outdrop;
3148 }
3149
3150 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3151 /* and ending byte for EOVERFLOW in SEEK_SET */
3152 error = check_file_seek_range(&fl, offset);
3153 if (error) {
3154 goto outdrop;
3155 }
3156
3157 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
3158 error = EINVAL;
3159 goto outdrop;
3160 }
3161
3162 switch (fl.l_type) {
3163 case F_RDLCK:
3164 case F_UNLCK:
3165 case F_WRLCK:
3166 break;
3167 default:
3168 error = EINVAL;
3169 goto outdrop;
3170 }
3171
3172 switch (fl.l_whence) {
3173 case SEEK_CUR:
3174 case SEEK_SET:
3175 case SEEK_END:
3176 break;
3177 default:
3178 error = EINVAL;
3179 goto outdrop;
3180 }
3181
3182 if ((error = vnode_getwithref(vp)) == 0) {
3183 if (fl.l_whence == SEEK_CUR) {
3184 fl.l_start += offset;
3185 }
3186
3187 #if CONFIG_MACF
3188 error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3189 cmd, &fl);
3190 if (error == 0)
3191 #endif
3192 switch (cmd) {
3193 case F_OFD_GETLK:
3194 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3195 F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
3196 break;
3197 case F_OFD_GETLKPID:
3198 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3199 F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
3200 break;
3201 default:
3202 error = VNOP_ADVLOCK(vp, (caddr_t)p,
3203 cmd, &fl, F_POSIX, &context, NULL);
3204 break;
3205 }
3206
3207 (void)vnode_put(vp);
3208
3209 if (error == 0) {
3210 error = copyout((caddr_t)&fl, argp, sizeof(fl));
3211 }
3212 }
3213 goto outdrop;
3214
3215 case F_PREALLOCATE: {
3216 fstore_t alloc_struct; /* structure for allocate command */
3217 u_int32_t alloc_flags = 0;
3218
3219 if (fp->f_type != DTYPE_VNODE) {
3220 error = EBADF;
3221 goto out;
3222 }
3223
3224 vp = (struct vnode *)fp_get_data(fp);
3225 proc_fdunlock(p);
3226
3227 /* make sure that we have write permission */
3228 if ((fp->f_flag & FWRITE) == 0) {
3229 error = EBADF;
3230 goto outdrop;
3231 }
3232
3233 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
3234 if (error) {
3235 goto outdrop;
3236 }
3237
3238 /* now set the space allocated to 0 */
3239 alloc_struct.fst_bytesalloc = 0;
3240
3241 /*
3242 * Do some simple parameter checking
3243 */
3244
3245 /* set up the flags */
3246
3247 alloc_flags |= PREALLOCATE;
3248
3249 if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
3250 alloc_flags |= ALLOCATECONTIG;
3251 }
3252
3253 if (alloc_struct.fst_flags & F_ALLOCATEALL) {
3254 alloc_flags |= ALLOCATEALL;
3255 }
3256
3257 /*
3258 * Do any position mode specific stuff. The only
3259 * position mode supported now is PEOFPOSMODE
3260 */
3261
3262 switch (alloc_struct.fst_posmode) {
3263 case F_PEOFPOSMODE:
3264 if (alloc_struct.fst_offset != 0) {
3265 error = EINVAL;
3266 goto outdrop;
3267 }
3268
3269 alloc_flags |= ALLOCATEFROMPEOF;
3270 break;
3271
3272 case F_VOLPOSMODE:
3273 if (alloc_struct.fst_offset <= 0) {
3274 error = EINVAL;
3275 goto outdrop;
3276 }
3277
3278 alloc_flags |= ALLOCATEFROMVOL;
3279 break;
3280
3281 default: {
3282 error = EINVAL;
3283 goto outdrop;
3284 }
3285 }
3286 if ((error = vnode_getwithref(vp)) == 0) {
3287 /*
3288 * call allocate to get the space
3289 */
3290 error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
3291 &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
3292 &context);
3293 (void)vnode_put(vp);
3294
3295 error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
3296
3297 if (error == 0) {
3298 error = error2;
3299 }
3300 }
3301 goto outdrop;
3302 }
3303 case F_PUNCHHOLE: {
3304 fpunchhole_t args;
3305
3306 if (fp->f_type != DTYPE_VNODE) {
3307 error = EBADF;
3308 goto out;
3309 }
3310
3311 vp = (struct vnode *)fp_get_data(fp);
3312 proc_fdunlock(p);
3313
3314 /* need write permissions */
3315 if ((fp->f_flag & FWRITE) == 0) {
3316 error = EPERM;
3317 goto outdrop;
3318 }
3319
3320 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3321 goto outdrop;
3322 }
3323
3324 if ((error = vnode_getwithref(vp))) {
3325 goto outdrop;
3326 }
3327
3328 #if CONFIG_MACF
3329 if ((error = mac_vnode_check_write(&context, fp->fp_glob->fg_cred, vp))) {
3330 (void)vnode_put(vp);
3331 goto outdrop;
3332 }
3333 #endif
3334
3335 error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
3336 (void)vnode_put(vp);
3337
3338 goto outdrop;
3339 }
3340 case F_TRIM_ACTIVE_FILE: {
3341 ftrimactivefile_t args;
3342
3343 if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
3344 error = EACCES;
3345 goto out;
3346 }
3347
3348 if (fp->f_type != DTYPE_VNODE) {
3349 error = EBADF;
3350 goto out;
3351 }
3352
3353 vp = (struct vnode *)fp_get_data(fp);
3354 proc_fdunlock(p);
3355
3356 /* need write permissions */
3357 if ((fp->f_flag & FWRITE) == 0) {
3358 error = EPERM;
3359 goto outdrop;
3360 }
3361
3362 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3363 goto outdrop;
3364 }
3365
3366 if ((error = vnode_getwithref(vp))) {
3367 goto outdrop;
3368 }
3369
3370 error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
3371 (void)vnode_put(vp);
3372
3373 goto outdrop;
3374 }
3375 case F_SPECULATIVE_READ: {
3376 fspecread_t args;
3377 off_t temp_length = 0;
3378
3379 if (fp->f_type != DTYPE_VNODE) {
3380 error = EBADF;
3381 goto out;
3382 }
3383
3384 vp = (struct vnode *)fp_get_data(fp);
3385 proc_fdunlock(p);
3386
3387 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3388 goto outdrop;
3389 }
3390
3391 /* Discard invalid offsets or lengths */
3392 if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
3393 error = EINVAL;
3394 goto outdrop;
3395 }
3396
3397 /*
3398 * Round the file offset down to a page-size boundary (or to 0).
3399 * The filesystem will need to round the length up to the end of the page boundary
3400 * or to the EOF of the file.
3401 */
3402 uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
3403 uint64_t foff_delta = args.fsr_offset - foff;
3404 args.fsr_offset = (off_t) foff;
3405
3406 /*
3407 * Now add in the delta to the supplied length. Since we may have adjusted the
3408 * offset, increase it by the amount that we adjusted.
3409 */
3410 if (os_add_overflow(args.fsr_length, foff_delta, &args.fsr_length)) {
3411 error = EOVERFLOW;
3412 goto outdrop;
3413 }
3414
3415 /*
3416 * Make sure (fsr_offset + fsr_length) does not overflow.
3417 */
3418 if (os_add_overflow(args.fsr_offset, args.fsr_length, &temp_length)) {
3419 error = EOVERFLOW;
3420 goto outdrop;
3421 }
3422
3423 if ((error = vnode_getwithref(vp))) {
3424 goto outdrop;
3425 }
3426 error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
3427 (void)vnode_put(vp);
3428
3429 goto outdrop;
3430 }
3431 case F_SETSIZE:
3432 if (fp->f_type != DTYPE_VNODE) {
3433 error = EBADF;
3434 goto out;
3435 }
3436 vp = (struct vnode *)fp_get_data(fp);
3437 proc_fdunlock(p);
3438
3439 error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
3440 if (error) {
3441 goto outdrop;
3442 }
3443 AUDIT_ARG(value64, offset);
3444
3445 error = vnode_getwithref(vp);
3446 if (error) {
3447 goto outdrop;
3448 }
3449
3450 #if CONFIG_MACF
3451 error = mac_vnode_check_truncate(&context,
3452 fp->fp_glob->fg_cred, vp);
3453 if (error) {
3454 (void)vnode_put(vp);
3455 goto outdrop;
3456 }
3457 #endif
3458 /*
3459 * Make sure that we are root. Growing a file
3460 * without zero filling the data is a security hole.
3461 */
3462 if (!kauth_cred_issuser(kauth_cred_get())) {
3463 error = EACCES;
3464 } else {
3465 /*
3466 * Require privilege to change file size without zerofill,
3467 * else will change the file size and zerofill it.
3468 */
3469 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_SETSIZE, 0);
3470 if (error == 0) {
3471 error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
3472 } else {
3473 error = vnode_setsize(vp, offset, 0, &context);
3474 }
3475
3476 #if CONFIG_MACF
3477 if (error == 0) {
3478 mac_vnode_notify_truncate(&context, fp->fp_glob->fg_cred, vp);
3479 }
3480 #endif
3481 }
3482
3483 (void)vnode_put(vp);
3484 goto outdrop;
3485
3486 case F_RDAHEAD:
3487 if (fp->f_type != DTYPE_VNODE) {
3488 error = EBADF;
3489 goto out;
3490 }
3491 if (uap->arg) {
3492 os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3493 } else {
3494 os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3495 }
3496 goto out;
3497
3498 case F_NOCACHE:
3499 if (fp->f_type != DTYPE_VNODE) {
3500 error = EBADF;
3501 goto out;
3502 }
3503 if (uap->arg) {
3504 os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3505 } else {
3506 os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3507 }
3508 goto out;
3509
3510 case F_NODIRECT:
3511 if (fp->f_type != DTYPE_VNODE) {
3512 error = EBADF;
3513 goto out;
3514 }
3515 if (uap->arg) {
3516 os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3517 } else {
3518 os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3519 }
3520 goto out;
3521
3522 case F_SINGLE_WRITER:
3523 if (fp->f_type != DTYPE_VNODE) {
3524 error = EBADF;
3525 goto out;
3526 }
3527 if (uap->arg) {
3528 os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3529 } else {
3530 os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3531 }
3532 goto out;
3533
3534 case F_GLOBAL_NOCACHE:
3535 if (fp->f_type != DTYPE_VNODE) {
3536 error = EBADF;
3537 goto out;
3538 }
3539 vp = (struct vnode *)fp_get_data(fp);
3540 proc_fdunlock(p);
3541
3542 if ((error = vnode_getwithref(vp)) == 0) {
3543 *retval = vnode_isnocache(vp);
3544
3545 if (uap->arg) {
3546 vnode_setnocache(vp);
3547 } else {
3548 vnode_clearnocache(vp);
3549 }
3550
3551 (void)vnode_put(vp);
3552 }
3553 goto outdrop;
3554
3555 case F_CHECK_OPENEVT:
3556 if (fp->f_type != DTYPE_VNODE) {
3557 error = EBADF;
3558 goto out;
3559 }
3560 vp = (struct vnode *)fp_get_data(fp);
3561 proc_fdunlock(p);
3562
3563 if ((error = vnode_getwithref(vp)) == 0) {
3564 *retval = vnode_is_openevt(vp);
3565
3566 if (uap->arg) {
3567 vnode_set_openevt(vp);
3568 } else {
3569 vnode_clear_openevt(vp);
3570 }
3571
3572 (void)vnode_put(vp);
3573 }
3574 goto outdrop;
3575
3576 case F_RDADVISE: {
3577 struct radvisory ra_struct;
3578
3579 if (fp->f_type != DTYPE_VNODE) {
3580 error = EBADF;
3581 goto out;
3582 }
3583 vp = (struct vnode *)fp_get_data(fp);
3584 proc_fdunlock(p);
3585
3586 if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
3587 goto outdrop;
3588 }
3589 if (ra_struct.ra_offset < 0 || ra_struct.ra_count < 0) {
3590 error = EINVAL;
3591 goto outdrop;
3592 }
3593 if ((error = vnode_getwithref(vp)) == 0) {
3594 error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
3595
3596 (void)vnode_put(vp);
3597 }
3598 goto outdrop;
3599 }
3600
3601 case F_FLUSH_DATA:
3602
3603 if (fp->f_type != DTYPE_VNODE) {
3604 error = EBADF;
3605 goto out;
3606 }
3607 vp = (struct vnode *)fp_get_data(fp);
3608 proc_fdunlock(p);
3609
3610 if ((error = vnode_getwithref(vp)) == 0) {
3611 error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
3612
3613 (void)vnode_put(vp);
3614 }
3615 goto outdrop;
3616
3617 case F_LOG2PHYS:
3618 case F_LOG2PHYS_EXT: {
3619 struct log2phys l2p_struct = {}; /* structure for allocate command */
3620 int devBlockSize;
3621
3622 off_t file_offset = 0;
3623 size_t a_size = 0;
3624 size_t run = 0;
3625
3626 if (cmd == F_LOG2PHYS_EXT) {
3627 error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
3628 if (error) {
3629 goto out;
3630 }
3631 file_offset = l2p_struct.l2p_devoffset;
3632 } else {
3633 file_offset = fp->f_offset;
3634 }
3635 if (fp->f_type != DTYPE_VNODE) {
3636 error = EBADF;
3637 goto out;
3638 }
3639 vp = (struct vnode *)fp_get_data(fp);
3640 proc_fdunlock(p);
3641 if ((error = vnode_getwithref(vp))) {
3642 goto outdrop;
3643 }
3644 error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
3645 if (error) {
3646 (void)vnode_put(vp);
3647 goto outdrop;
3648 }
3649 error = VNOP_BLKTOOFF(vp, lbn, &offset);
3650 if (error) {
3651 (void)vnode_put(vp);
3652 goto outdrop;
3653 }
3654 devBlockSize = vfs_devblocksize(vnode_mount(vp));
3655 if (cmd == F_LOG2PHYS_EXT) {
3656 if (l2p_struct.l2p_contigbytes < 0) {
3657 vnode_put(vp);
3658 error = EINVAL;
3659 goto outdrop;
3660 }
3661
3662 a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
3663 } else {
3664 a_size = devBlockSize;
3665 }
3666
3667 error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
3668
3669 (void)vnode_put(vp);
3670
3671 if (!error) {
3672 l2p_struct.l2p_flags = 0; /* for now */
3673 if (cmd == F_LOG2PHYS_EXT) {
3674 l2p_struct.l2p_contigbytes = run - (file_offset - offset);
3675 } else {
3676 l2p_struct.l2p_contigbytes = 0; /* for now */
3677 }
3678
3679 /*
3680 * The block number being -1 suggests that the file offset is not backed
3681 * by any real blocks on-disk. As a result, just let it be passed back up wholesale.
3682 */
3683 if (bn == -1) {
3684 /* Don't multiply it by the block size */
3685 l2p_struct.l2p_devoffset = bn;
3686 } else {
3687 l2p_struct.l2p_devoffset = bn * devBlockSize;
3688 l2p_struct.l2p_devoffset += file_offset - offset;
3689 }
3690 error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
3691 }
3692 goto outdrop;
3693 }
3694 case F_GETPATH:
3695 case F_GETPATH_NOFIRMLINK: {
3696 char *pathbufp;
3697 int pathlen;
3698
3699 if (fp->f_type != DTYPE_VNODE) {
3700 error = EBADF;
3701 goto out;
3702 }
3703 vp = (struct vnode *)fp_get_data(fp);
3704 proc_fdunlock(p);
3705
3706 pathlen = MAXPATHLEN;
3707 pathbufp = zalloc(ZV_NAMEI);
3708
3709 if ((error = vnode_getwithref(vp)) == 0) {
3710 if (cmd == F_GETPATH_NOFIRMLINK) {
3711 error = vn_getpath_ext(vp, NULL, pathbufp, &pathlen, VN_GETPATH_NO_FIRMLINK);
3712 } else {
3713 error = vn_getpath(vp, pathbufp, &pathlen);
3714 }
3715 (void)vnode_put(vp);
3716
3717 if (error == 0) {
3718 error = copyout((caddr_t)pathbufp, argp, pathlen);
3719 }
3720 }
3721 zfree(ZV_NAMEI, pathbufp);
3722 goto outdrop;
3723 }
3724
3725 case F_PATHPKG_CHECK: {
3726 char *pathbufp;
3727 size_t pathlen;
3728
3729 if (fp->f_type != DTYPE_VNODE) {
3730 error = EBADF;
3731 goto out;
3732 }
3733 vp = (struct vnode *)fp_get_data(fp);
3734 proc_fdunlock(p);
3735
3736 pathlen = MAXPATHLEN;
3737 pathbufp = zalloc(ZV_NAMEI);
3738
3739 if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
3740 if ((error = vnode_getwithref(vp)) == 0) {
3741 AUDIT_ARG(text, pathbufp);
3742 error = vn_path_package_check(vp, pathbufp, (int)pathlen, retval);
3743
3744 (void)vnode_put(vp);
3745 }
3746 }
3747 zfree(ZV_NAMEI, pathbufp);
3748 goto outdrop;
3749 }
3750
3751 case F_CHKCLEAN: // used by regression tests to see if all dirty pages got cleaned by fsync()
3752 case F_FULLFSYNC: // fsync + flush the journal + DKIOCSYNCHRONIZE
3753 case F_BARRIERFSYNC: // fsync + barrier
3754 case F_FREEZE_FS: // freeze all other fs operations for the fs of this fd
3755 case F_THAW_FS: { // thaw all frozen fs operations for the fs of this fd
3756 if (fp->f_type != DTYPE_VNODE) {
3757 error = EBADF;
3758 goto out;
3759 }
3760 vp = (struct vnode *)fp_get_data(fp);
3761 proc_fdunlock(p);
3762
3763 if ((error = vnode_getwithref(vp)) == 0) {
3764 error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
3765
3766 (void)vnode_put(vp);
3767 }
3768 break;
3769 }
3770
3771 /*
3772 * SPI (private) for opening a file starting from a dir fd
3773 */
3774 case F_OPENFROM: {
3775 /* Check if this isn't a valid file descriptor */
3776 if (fp->f_type != DTYPE_VNODE) {
3777 error = EBADF;
3778 goto out;
3779 }
3780 vp = (struct vnode *)fp_get_data(fp);
3781
3782 return sys_fcntl__OPENFROM(p, fd, cmd, uap->arg, fp, vp, retval);
3783 }
3784
3785 /*
3786 * SPI (private) for unlinking a file starting from a dir fd
3787 */
3788 case F_UNLINKFROM: {
3789 user_addr_t pathname;
3790
3791 /* Check if this isn't a valid file descriptor */
3792 if ((fp->f_type != DTYPE_VNODE) ||
3793 (fp->f_flag & FREAD) == 0) {
3794 error = EBADF;
3795 goto out;
3796 }
3797 vp = (struct vnode *)fp_get_data(fp);
3798 proc_fdunlock(p);
3799
3800 if (vnode_getwithref(vp)) {
3801 error = ENOENT;
3802 goto outdrop;
3803 }
3804
3805 /* Only valid for directories */
3806 if (vp->v_type != VDIR) {
3807 vnode_put(vp);
3808 error = ENOTDIR;
3809 goto outdrop;
3810 }
3811
3812 /*
3813 * Only entitled apps may use the credentials of the thread
3814 * that opened the file descriptor.
3815 * Non-entitled threads will use their own context.
3816 */
3817 if (IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT)) {
3818 has_entitlement = 1;
3819 }
3820
3821 /* Get flags, mode and pathname arguments. */
3822 if (IS_64BIT_PROCESS(p)) {
3823 pathname = (user_addr_t)argp;
3824 } else {
3825 pathname = CAST_USER_ADDR_T(argp);
3826 }
3827
3828 /* Start the lookup relative to the file descriptor's vnode. */
3829 error = unlink1(has_entitlement ? &context : vfs_context_current(),
3830 vp, pathname, UIO_USERSPACE, 0);
3831
3832 vnode_put(vp);
3833 break;
3834 }
3835
3836 case F_ADDSIGS:
3837 case F_ADDFILESIGS:
3838 case F_ADDFILESIGS_FOR_DYLD_SIM:
3839 case F_ADDFILESIGS_RETURN:
3840 case F_ADDFILESIGS_INFO:
3841 {
3842 struct cs_blob *blob = NULL;
3843 struct user_fsignatures fs;
3844 kern_return_t kr;
3845 vm_offset_t kernel_blob_addr;
3846 vm_size_t kernel_blob_size;
3847 int blob_add_flags = 0;
3848 const size_t sizeof_fs = (cmd == F_ADDFILESIGS_INFO ?
3849 offsetof(struct user_fsignatures, fs_cdhash /* first output element */) :
3850 offsetof(struct user_fsignatures, fs_fsignatures_size /* compat */));
3851
3852 if (fp->f_type != DTYPE_VNODE) {
3853 error = EBADF;
3854 goto out;
3855 }
3856 vp = (struct vnode *)fp_get_data(fp);
3857 proc_fdunlock(p);
3858
3859 if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3860 blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
3861 if ((proc_getcsflags(p) & CS_KILL) == 0) {
3862 proc_lock(p);
3863 proc_csflags_set(p, CS_KILL);
3864 proc_unlock(p);
3865 }
3866 }
3867
3868 error = vnode_getwithref(vp);
3869 if (error) {
3870 goto outdrop;
3871 }
3872
3873 if (IS_64BIT_PROCESS(p)) {
3874 error = copyin(argp, &fs, sizeof_fs);
3875 } else {
3876 if (cmd == F_ADDFILESIGS_INFO) {
3877 error = EINVAL;
3878 vnode_put(vp);
3879 goto outdrop;
3880 }
3881
3882 struct user32_fsignatures fs32;
3883
3884 error = copyin(argp, &fs32, sizeof(fs32));
3885 fs.fs_file_start = fs32.fs_file_start;
3886 fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
3887 fs.fs_blob_size = fs32.fs_blob_size;
3888 }
3889
3890 if (error) {
3891 vnode_put(vp);
3892 goto outdrop;
3893 }
3894
3895 /*
3896 * First check if we have something loaded a this offset
3897 */
3898 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
3899 if (blob != NULL) {
3900 /* If this is for dyld_sim revalidate the blob */
3901 if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3902 error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
3903 if (error) {
3904 blob = NULL;
3905 if (error != EAGAIN) {
3906 vnode_put(vp);
3907 goto outdrop;
3908 }
3909 }
3910 }
3911 }
3912
3913 if (blob == NULL) {
3914 /*
3915 * An arbitrary limit, to prevent someone from mapping in a 20GB blob. This should cover
3916 * our use cases for the immediate future, but note that at the time of this commit, some
3917 * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
3918 *
3919 * We should consider how we can manage this more effectively; the above means that some
3920 * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
3921 * threshold considered ridiculous at the time of this change.
3922 */
3923 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
3924 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
3925 error = E2BIG;
3926 vnode_put(vp);
3927 goto outdrop;
3928 }
3929
3930 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
3931 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
3932 if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
3933 error = ENOMEM;
3934 vnode_put(vp);
3935 goto outdrop;
3936 }
3937
3938 if (cmd == F_ADDSIGS) {
3939 error = copyin(fs.fs_blob_start,
3940 (void *) kernel_blob_addr,
3941 fs.fs_blob_size);
3942 } else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM || F_ADDFILESIGS_INFO */
3943 int resid;
3944
3945 error = vn_rdwr(UIO_READ,
3946 vp,
3947 (caddr_t) kernel_blob_addr,
3948 (int)kernel_blob_size,
3949 fs.fs_file_start + fs.fs_blob_start,
3950 UIO_SYSSPACE,
3951 0,
3952 kauth_cred_get(),
3953 &resid,
3954 p);
3955 if ((error == 0) && resid) {
3956 /* kernel_blob_size rounded to a page size, but signature may be at end of file */
3957 memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
3958 }
3959 }
3960
3961 if (error) {
3962 ubc_cs_blob_deallocate(kernel_blob_addr,
3963 kernel_blob_size);
3964 vnode_put(vp);
3965 goto outdrop;
3966 }
3967
3968 blob = NULL;
3969 error = ubc_cs_blob_add(vp,
3970 proc_platform(p),
3971 CPU_TYPE_ANY, /* not for a specific architecture */
3972 CPU_SUBTYPE_ANY,
3973 fs.fs_file_start,
3974 &kernel_blob_addr,
3975 kernel_blob_size,
3976 NULL,
3977 blob_add_flags,
3978 &blob);
3979
3980 /* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
3981 if (error) {
3982 if (kernel_blob_addr) {
3983 ubc_cs_blob_deallocate(kernel_blob_addr,
3984 kernel_blob_size);
3985 }
3986 vnode_put(vp);
3987 goto outdrop;
3988 } else {
3989 #if CHECK_CS_VALIDATION_BITMAP
3990 ubc_cs_validation_bitmap_allocate( vp );
3991 #endif
3992 }
3993 }
3994
3995 if (cmd == F_ADDFILESIGS_RETURN || cmd == F_ADDFILESIGS_FOR_DYLD_SIM ||
3996 cmd == F_ADDFILESIGS_INFO) {
3997 /*
3998 * The first element of the structure is a
3999 * off_t that happen to have the same size for
4000 * all archs. Lets overwrite that.
4001 */
4002 off_t end_offset = 0;
4003 if (blob) {
4004 end_offset = blob->csb_end_offset;
4005 }
4006 error = copyout(&end_offset, argp, sizeof(end_offset));
4007
4008 if (error) {
4009 vnode_put(vp);
4010 goto outdrop;
4011 }
4012 }
4013
4014 if (cmd == F_ADDFILESIGS_INFO) {
4015 /* Return information. What we copy out depends on the size of the
4016 * passed in structure, to keep binary compatibility. */
4017
4018 if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
4019 // enough room for fs_cdhash[20]+fs_hash_type
4020
4021 if (blob != NULL) {
4022 error = copyout(blob->csb_cdhash,
4023 (vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
4024 USER_FSIGNATURES_CDHASH_LEN);
4025 if (error) {
4026 vnode_put(vp);
4027 goto outdrop;
4028 }
4029 int hashtype = cs_hash_type(blob->csb_hashtype);
4030 error = copyout(&hashtype,
4031 (vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
4032 sizeof(int));
4033 if (error) {
4034 vnode_put(vp);
4035 goto outdrop;
4036 }
4037 }
4038 }
4039 }
4040
4041 (void) vnode_put(vp);
4042 break;
4043 }
4044 #if CONFIG_SUPPLEMENTAL_SIGNATURES
4045 case F_ADDFILESUPPL:
4046 {
4047 struct vnode *ivp;
4048 struct cs_blob *blob = NULL;
4049 struct user_fsupplement fs;
4050 int orig_fd;
4051 struct fileproc* orig_fp = NULL;
4052 kern_return_t kr;
4053 vm_offset_t kernel_blob_addr;
4054 vm_size_t kernel_blob_size;
4055
4056 if (!IS_64BIT_PROCESS(p)) {
4057 error = EINVAL;
4058 goto out; // drop fp and unlock fds
4059 }
4060
4061 if (fp->f_type != DTYPE_VNODE) {
4062 error = EBADF;
4063 goto out;
4064 }
4065
4066 error = copyin(argp, &fs, sizeof(fs));
4067 if (error) {
4068 goto out;
4069 }
4070
4071 orig_fd = fs.fs_orig_fd;
4072 if ((error = fp_lookup(p, orig_fd, &orig_fp, 1))) {
4073 printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
4074 goto out;
4075 }
4076
4077 if (orig_fp->f_type != DTYPE_VNODE) {
4078 error = EBADF;
4079 fp_drop(p, orig_fd, orig_fp, 1);
4080 goto out;
4081 }
4082
4083 ivp = (struct vnode *)fp_get_data(orig_fp);
4084
4085 vp = (struct vnode *)fp_get_data(fp);
4086
4087 proc_fdunlock(p);
4088
4089 error = vnode_getwithref(ivp);
4090 if (error) {
4091 fp_drop(p, orig_fd, orig_fp, 0);
4092 goto outdrop; //drop fp
4093 }
4094
4095 error = vnode_getwithref(vp);
4096 if (error) {
4097 vnode_put(ivp);
4098 fp_drop(p, orig_fd, orig_fp, 0);
4099 goto outdrop;
4100 }
4101
4102 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
4103 error = E2BIG;
4104 goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
4105 }
4106
4107 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
4108 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
4109 if (kr != KERN_SUCCESS) {
4110 error = ENOMEM;
4111 goto dropboth;
4112 }
4113
4114 int resid;
4115 error = vn_rdwr(UIO_READ, vp,
4116 (caddr_t)kernel_blob_addr, (int)kernel_blob_size,
4117 fs.fs_file_start + fs.fs_blob_start,
4118 UIO_SYSSPACE, 0,
4119 kauth_cred_get(), &resid, p);
4120 if ((error == 0) && resid) {
4121 /* kernel_blob_size rounded to a page size, but signature may be at end of file */
4122 memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
4123 }
4124
4125 if (error) {
4126 ubc_cs_blob_deallocate(kernel_blob_addr,
4127 kernel_blob_size);
4128 goto dropboth;
4129 }
4130
4131 error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
4132 &kernel_blob_addr, kernel_blob_size, &blob);
4133
4134 /* ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed */
4135 if (error) {
4136 if (kernel_blob_addr) {
4137 ubc_cs_blob_deallocate(kernel_blob_addr,
4138 kernel_blob_size);
4139 }
4140 goto dropboth;
4141 }
4142 vnode_put(ivp);
4143 vnode_put(vp);
4144 fp_drop(p, orig_fd, orig_fp, 0);
4145 break;
4146
4147 dropboth:
4148 vnode_put(ivp);
4149 vnode_put(vp);
4150 fp_drop(p, orig_fd, orig_fp, 0);
4151 goto outdrop;
4152 }
4153 #endif
4154 case F_GETCODEDIR:
4155 case F_FINDSIGS: {
4156 error = ENOTSUP;
4157 goto out;
4158 }
4159 case F_CHECK_LV: {
4160 struct fileglob *fg;
4161 fchecklv_t lv = {};
4162
4163 if (fp->f_type != DTYPE_VNODE) {
4164 error = EBADF;
4165 goto out;
4166 }
4167 fg = fp->fp_glob;
4168 proc_fdunlock(p);
4169
4170 if (IS_64BIT_PROCESS(p)) {
4171 error = copyin(argp, &lv, sizeof(lv));
4172 } else {
4173 struct user32_fchecklv lv32 = {};
4174
4175 error = copyin(argp, &lv32, sizeof(lv32));
4176 lv.lv_file_start = lv32.lv_file_start;
4177 lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
4178 lv.lv_error_message_size = lv32.lv_error_message_size;
4179 }
4180 if (error) {
4181 goto outdrop;
4182 }
4183
4184 #if CONFIG_MACF
4185 error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
4186 (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
4187 #endif
4188
4189 break;
4190 }
4191 case F_GETSIGSINFO: {
4192 struct cs_blob *blob = NULL;
4193 fgetsigsinfo_t sigsinfo = {};
4194
4195 if (fp->f_type != DTYPE_VNODE) {
4196 error = EBADF;
4197 goto out;
4198 }
4199 vp = (struct vnode *)fp_get_data(fp);
4200 proc_fdunlock(p);
4201
4202 error = vnode_getwithref(vp);
4203 if (error) {
4204 goto outdrop;
4205 }
4206
4207 error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
4208 if (error) {
4209 vnode_put(vp);
4210 goto outdrop;
4211 }
4212
4213 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
4214 if (blob == NULL) {
4215 error = ENOENT;
4216 vnode_put(vp);
4217 goto outdrop;
4218 }
4219 switch (sigsinfo.fg_info_request) {
4220 case GETSIGSINFO_PLATFORM_BINARY:
4221 sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
4222 error = copyout(&sigsinfo.fg_sig_is_platform,
4223 (vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
4224 sizeof(sigsinfo.fg_sig_is_platform));
4225 if (error) {
4226 vnode_put(vp);
4227 goto outdrop;
4228 }
4229 break;
4230 default:
4231 error = EINVAL;
4232 vnode_put(vp);
4233 goto outdrop;
4234 }
4235 vnode_put(vp);
4236 break;
4237 }
4238 #if CONFIG_PROTECT
4239 case F_GETPROTECTIONCLASS: {
4240 if (fp->f_type != DTYPE_VNODE) {
4241 error = EBADF;
4242 goto out;
4243 }
4244 vp = (struct vnode *)fp_get_data(fp);
4245
4246 proc_fdunlock(p);
4247
4248 if (vnode_getwithref(vp)) {
4249 error = ENOENT;
4250 goto outdrop;
4251 }
4252
4253 struct vnode_attr va;
4254
4255 VATTR_INIT(&va);
4256 VATTR_WANTED(&va, va_dataprotect_class);
4257 error = VNOP_GETATTR(vp, &va, &context);
4258 if (!error) {
4259 if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
4260 *retval = va.va_dataprotect_class;
4261 } else {
4262 error = ENOTSUP;
4263 }
4264 }
4265
4266 vnode_put(vp);
4267 break;
4268 }
4269
4270 case F_SETPROTECTIONCLASS: {
4271 /* tmp must be a valid PROTECTION_CLASS_* */
4272 tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4273
4274 if (fp->f_type != DTYPE_VNODE) {
4275 error = EBADF;
4276 goto out;
4277 }
4278 vp = (struct vnode *)fp_get_data(fp);
4279
4280 proc_fdunlock(p);
4281
4282 if (vnode_getwithref(vp)) {
4283 error = ENOENT;
4284 goto outdrop;
4285 }
4286
4287 /* Only go forward if you have write access */
4288 vfs_context_t ctx = vfs_context_current();
4289 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4290 vnode_put(vp);
4291 error = EBADF;
4292 goto outdrop;
4293 }
4294
4295 struct vnode_attr va;
4296
4297 VATTR_INIT(&va);
4298 VATTR_SET(&va, va_dataprotect_class, tmp);
4299
4300 error = VNOP_SETATTR(vp, &va, ctx);
4301
4302 vnode_put(vp);
4303 break;
4304 }
4305
4306 case F_TRANSCODEKEY: {
4307 if (fp->f_type != DTYPE_VNODE) {
4308 error = EBADF;
4309 goto out;
4310 }
4311
4312 vp = (struct vnode *)fp_get_data(fp);
4313 proc_fdunlock(p);
4314
4315 if (vnode_getwithref(vp)) {
4316 error = ENOENT;
4317 goto outdrop;
4318 }
4319
4320 cp_key_t k = {
4321 .len = CP_MAX_WRAPPEDKEYSIZE,
4322 };
4323
4324 k.key = kalloc_data(CP_MAX_WRAPPEDKEYSIZE, Z_WAITOK | Z_ZERO);
4325 if (k.key == NULL) {
4326 error = ENOMEM;
4327 } else {
4328 error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
4329 }
4330
4331 vnode_put(vp);
4332
4333 if (error == 0) {
4334 error = copyout(k.key, argp, k.len);
4335 *retval = k.len;
4336 }
4337 kfree_data(k.key, CP_MAX_WRAPPEDKEYSIZE);
4338
4339 break;
4340 }
4341
4342 case F_GETPROTECTIONLEVEL: {
4343 if (fp->f_type != DTYPE_VNODE) {
4344 error = EBADF;
4345 goto out;
4346 }
4347
4348 vp = (struct vnode*)fp_get_data(fp);
4349 proc_fdunlock(p);
4350
4351 if (vnode_getwithref(vp)) {
4352 error = ENOENT;
4353 goto outdrop;
4354 }
4355
4356 error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
4357
4358 vnode_put(vp);
4359 break;
4360 }
4361
4362 case F_GETDEFAULTPROTLEVEL: {
4363 if (fp->f_type != DTYPE_VNODE) {
4364 error = EBADF;
4365 goto out;
4366 }
4367
4368 vp = (struct vnode*)fp_get_data(fp);
4369 proc_fdunlock(p);
4370
4371 if (vnode_getwithref(vp)) {
4372 error = ENOENT;
4373 goto outdrop;
4374 }
4375
4376 /*
4377 * if cp_get_major_vers fails, error will be set to proper errno
4378 * and cp_version will still be 0.
4379 */
4380
4381 error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
4382
4383 vnode_put(vp);
4384 break;
4385 }
4386
4387 #endif /* CONFIG_PROTECT */
4388
4389 case F_MOVEDATAEXTENTS: {
4390 struct fileproc *fp2 = NULL;
4391 struct vnode *src_vp = NULLVP;
4392 struct vnode *dst_vp = NULLVP;
4393 /* We need to grab the 2nd FD out of the argments before moving on. */
4394 int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
4395
4396 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
4397 if (error) {
4398 goto out;
4399 }
4400
4401 if (fp->f_type != DTYPE_VNODE) {
4402 error = EBADF;
4403 goto out;
4404 }
4405
4406 /*
4407 * For now, special case HFS+ and APFS only, since this
4408 * is SPI.
4409 */
4410 src_vp = (struct vnode *)fp_get_data(fp);
4411 if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
4412 error = ENOTSUP;
4413 goto out;
4414 }
4415
4416 /*
4417 * Get the references before we start acquiring iocounts on the vnodes,
4418 * while we still hold the proc fd lock
4419 */
4420 if ((error = fp_lookup(p, fd2, &fp2, 1))) {
4421 error = EBADF;
4422 goto out;
4423 }
4424 if (fp2->f_type != DTYPE_VNODE) {
4425 fp_drop(p, fd2, fp2, 1);
4426 error = EBADF;
4427 goto out;
4428 }
4429 dst_vp = (struct vnode *)fp_get_data(fp2);
4430 if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
4431 fp_drop(p, fd2, fp2, 1);
4432 error = ENOTSUP;
4433 goto out;
4434 }
4435
4436 #if CONFIG_MACF
4437 /* Re-do MAC checks against the new FD, pass in a fake argument */
4438 error = mac_file_check_fcntl(kauth_cred_get(), fp2->fp_glob, cmd, 0);
4439 if (error) {
4440 fp_drop(p, fd2, fp2, 1);
4441 goto out;
4442 }
4443 #endif
4444 /* Audit the 2nd FD */
4445 AUDIT_ARG(fd, fd2);
4446
4447 proc_fdunlock(p);
4448
4449 if (vnode_getwithref(src_vp)) {
4450 fp_drop(p, fd2, fp2, 0);
4451 error = ENOENT;
4452 goto outdrop;
4453 }
4454 if (vnode_getwithref(dst_vp)) {
4455 vnode_put(src_vp);
4456 fp_drop(p, fd2, fp2, 0);
4457 error = ENOENT;
4458 goto outdrop;
4459 }
4460
4461 /*
4462 * Basic asserts; validate they are not the same and that
4463 * both live on the same filesystem.
4464 */
4465 if (dst_vp == src_vp) {
4466 vnode_put(src_vp);
4467 vnode_put(dst_vp);
4468 fp_drop(p, fd2, fp2, 0);
4469 error = EINVAL;
4470 goto outdrop;
4471 }
4472
4473 if (dst_vp->v_mount != src_vp->v_mount) {
4474 vnode_put(src_vp);
4475 vnode_put(dst_vp);
4476 fp_drop(p, fd2, fp2, 0);
4477 error = EXDEV;
4478 goto outdrop;
4479 }
4480
4481 /* Now we have a legit pair of FDs. Go to work */
4482
4483 /* Now check for write access to the target files */
4484 if (vnode_authorize(src_vp, NULLVP,
4485 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4486 vnode_put(src_vp);
4487 vnode_put(dst_vp);
4488 fp_drop(p, fd2, fp2, 0);
4489 error = EBADF;
4490 goto outdrop;
4491 }
4492
4493 if (vnode_authorize(dst_vp, NULLVP,
4494 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4495 vnode_put(src_vp);
4496 vnode_put(dst_vp);
4497 fp_drop(p, fd2, fp2, 0);
4498 error = EBADF;
4499 goto outdrop;
4500 }
4501
4502 /* Verify that both vps point to files and not directories */
4503 if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
4504 error = EINVAL;
4505 vnode_put(src_vp);
4506 vnode_put(dst_vp);
4507 fp_drop(p, fd2, fp2, 0);
4508 goto outdrop;
4509 }
4510
4511 /*
4512 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
4513 * We'll pass in our special bit indicating that the new behavior is expected
4514 */
4515
4516 error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
4517
4518 vnode_put(src_vp);
4519 vnode_put(dst_vp);
4520 fp_drop(p, fd2, fp2, 0);
4521 break;
4522 }
4523
4524 /*
4525 * SPI for making a file compressed.
4526 */
4527 case F_MAKECOMPRESSED: {
4528 uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4529
4530 if (fp->f_type != DTYPE_VNODE) {
4531 error = EBADF;
4532 goto out;
4533 }
4534
4535 vp = (struct vnode*)fp_get_data(fp);
4536 proc_fdunlock(p);
4537
4538 /* get the vnode */
4539 if (vnode_getwithref(vp)) {
4540 error = ENOENT;
4541 goto outdrop;
4542 }
4543
4544 /* Is it a file? */
4545 if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
4546 vnode_put(vp);
4547 error = EBADF;
4548 goto outdrop;
4549 }
4550
4551 /* invoke ioctl to pass off to FS */
4552 /* Only go forward if you have write access */
4553 vfs_context_t ctx = vfs_context_current();
4554 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4555 vnode_put(vp);
4556 error = EBADF;
4557 goto outdrop;
4558 }
4559
4560 error = VNOP_IOCTL(vp, cmd, (caddr_t)&gcounter, 0, &context);
4561
4562 vnode_put(vp);
4563 break;
4564 }
4565
4566 /*
4567 * SPI (private) for indicating to a filesystem that subsequent writes to
4568 * the open FD will written to the Fastflow.
4569 */
4570 case F_SET_GREEDY_MODE:
4571 /* intentionally drop through to the same handler as F_SETSTATIC.
4572 * both fcntls should pass the argument and their selector into VNOP_IOCTL.
4573 */
4574
4575 /*
4576 * SPI (private) for indicating to a filesystem that subsequent writes to
4577 * the open FD will represent static content.
4578 */
4579 case F_SETSTATICCONTENT: {
4580 caddr_t ioctl_arg = NULL;
4581
4582 if (uap->arg) {
4583 ioctl_arg = (caddr_t) 1;
4584 }
4585
4586 if (fp->f_type != DTYPE_VNODE) {
4587 error = EBADF;
4588 goto out;
4589 }
4590 vp = (struct vnode *)fp_get_data(fp);
4591 proc_fdunlock(p);
4592
4593 error = vnode_getwithref(vp);
4594 if (error) {
4595 error = ENOENT;
4596 goto outdrop;
4597 }
4598
4599 /* Only go forward if you have write access */
4600 vfs_context_t ctx = vfs_context_current();
4601 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4602 vnode_put(vp);
4603 error = EBADF;
4604 goto outdrop;
4605 }
4606
4607 error = VNOP_IOCTL(vp, cmd, ioctl_arg, 0, &context);
4608 (void)vnode_put(vp);
4609
4610 break;
4611 }
4612
4613 /*
4614 * SPI (private) for indicating to the lower level storage driver that the
4615 * subsequent writes should be of a particular IO type (burst, greedy, static),
4616 * or other flavors that may be necessary.
4617 */
4618 case F_SETIOTYPE: {
4619 caddr_t param_ptr;
4620 uint32_t param;
4621
4622 if (uap->arg) {
4623 /* extract 32 bits of flags from userland */
4624 param_ptr = (caddr_t) uap->arg;
4625 param = (uint32_t) param_ptr;
4626 } else {
4627 /* If no argument is specified, error out */
4628 error = EINVAL;
4629 goto out;
4630 }
4631
4632 /*
4633 * Validate the different types of flags that can be specified:
4634 * all of them are mutually exclusive for now.
4635 */
4636 switch (param) {
4637 case F_IOTYPE_ISOCHRONOUS:
4638 break;
4639
4640 default:
4641 error = EINVAL;
4642 goto out;
4643 }
4644
4645
4646 if (fp->f_type != DTYPE_VNODE) {
4647 error = EBADF;
4648 goto out;
4649 }
4650 vp = (struct vnode *)fp_get_data(fp);
4651 proc_fdunlock(p);
4652
4653 error = vnode_getwithref(vp);
4654 if (error) {
4655 error = ENOENT;
4656 goto outdrop;
4657 }
4658
4659 /* Only go forward if you have write access */
4660 vfs_context_t ctx = vfs_context_current();
4661 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4662 vnode_put(vp);
4663 error = EBADF;
4664 goto outdrop;
4665 }
4666
4667 error = VNOP_IOCTL(vp, cmd, param_ptr, 0, &context);
4668 (void)vnode_put(vp);
4669
4670 break;
4671 }
4672
4673 /*
4674 * Set the vnode pointed to by 'fd'
4675 * and tag it as the (potentially future) backing store
4676 * for another filesystem
4677 */
4678 case F_SETBACKINGSTORE: {
4679 if (fp->f_type != DTYPE_VNODE) {
4680 error = EBADF;
4681 goto out;
4682 }
4683
4684 vp = (struct vnode *)fp_get_data(fp);
4685
4686 if (vp->v_tag != VT_HFS) {
4687 error = EINVAL;
4688 goto out;
4689 }
4690 proc_fdunlock(p);
4691
4692 if (vnode_getwithref(vp)) {
4693 error = ENOENT;
4694 goto outdrop;
4695 }
4696
4697 /* only proceed if you have write access */
4698 vfs_context_t ctx = vfs_context_current();
4699 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4700 vnode_put(vp);
4701 error = EBADF;
4702 goto outdrop;
4703 }
4704
4705
4706 /* If arg != 0, set, otherwise unset */
4707 if (uap->arg) {
4708 error = VNOP_IOCTL(vp, cmd, (caddr_t)1, 0, &context);
4709 } else {
4710 error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
4711 }
4712
4713 vnode_put(vp);
4714 break;
4715 }
4716
4717 /*
4718 * like F_GETPATH, but special semantics for
4719 * the mobile time machine handler.
4720 */
4721 case F_GETPATH_MTMINFO: {
4722 char *pathbufp;
4723 int pathlen;
4724
4725 if (fp->f_type != DTYPE_VNODE) {
4726 error = EBADF;
4727 goto out;
4728 }
4729 vp = (struct vnode *)fp_get_data(fp);
4730 proc_fdunlock(p);
4731
4732 pathlen = MAXPATHLEN;
4733 pathbufp = zalloc(ZV_NAMEI);
4734
4735 if ((error = vnode_getwithref(vp)) == 0) {
4736 int backingstore = 0;
4737
4738 /* Check for error from vn_getpath before moving on */
4739 if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
4740 if (vp->v_tag == VT_HFS) {
4741 error = VNOP_IOCTL(vp, cmd, (caddr_t) &backingstore, 0, &context);
4742 }
4743 (void)vnode_put(vp);
4744
4745 if (error == 0) {
4746 error = copyout((caddr_t)pathbufp, argp, pathlen);
4747 }
4748 if (error == 0) {
4749 /*
4750 * If the copyout was successful, now check to ensure
4751 * that this vnode is not a BACKINGSTORE vnode. mtmd
4752 * wants the path regardless.
4753 */
4754 if (backingstore) {
4755 error = EBUSY;
4756 }
4757 }
4758 } else {
4759 (void)vnode_put(vp);
4760 }
4761 }
4762
4763 zfree(ZV_NAMEI, pathbufp);
4764 goto outdrop;
4765 }
4766
4767 case F_RECYCLE: {
4768 #if !DEBUG && !DEVELOPMENT
4769 bool allowed = false;
4770
4771 //
4772 // non-debug and non-development kernels have restrictions
4773 // on who can all this fcntl. the process has to be marked
4774 // with the dataless-manipulator entitlement and either the
4775 // process or thread have to be marked rapid-aging.
4776 //
4777 if (!vfs_context_is_dataless_manipulator(&context)) {
4778 error = EPERM;
4779 goto out;
4780 }
4781
4782 proc_t proc = vfs_context_proc(&context);
4783 if (proc && (proc->p_lflag & P_LRAGE_VNODES)) {
4784 allowed = true;
4785 } else {
4786 thread_t thr = vfs_context_thread(&context);
4787 if (thr) {
4788 struct uthread *ut = get_bsdthread_info(thr);
4789
4790 if (ut && (ut->uu_flag & UT_RAGE_VNODES)) {
4791 allowed = true;
4792 }
4793 }
4794 }
4795 if (!allowed) {
4796 error = EPERM;
4797 goto out;
4798 }
4799 #endif
4800
4801 if (fp->f_type != DTYPE_VNODE) {
4802 error = EBADF;
4803 goto out;
4804 }
4805 vp = (struct vnode *)fp_get_data(fp);
4806 proc_fdunlock(p);
4807
4808 vnode_recycle(vp);
4809 break;
4810 }
4811
4812 default:
4813 /*
4814 * This is an fcntl() that we d not recognize at this level;
4815 * if this is a vnode, we send it down into the VNOP_IOCTL
4816 * for this vnode; this can include special devices, and will
4817 * effectively overload fcntl() to send ioctl()'s.
4818 */
4819 if ((cmd & IOC_VOID) && (cmd & IOC_INOUT)) {
4820 error = EINVAL;
4821 goto out;
4822 }
4823
4824 /* Catch any now-invalid fcntl() selectors */
4825 switch (cmd) {
4826 case (int)APFSIOC_REVERT_TO_SNAPSHOT:
4827 case (int)FSIOC_FIOSEEKHOLE:
4828 case (int)FSIOC_FIOSEEKDATA:
4829 case (int)FSIOC_CAS_BSDFLAGS:
4830 case HFS_GET_BOOT_INFO:
4831 case HFS_SET_BOOT_INFO:
4832 case FIOPINSWAP:
4833 case F_MARKDEPENDENCY:
4834 case TIOCREVOKE:
4835 case TIOCREVOKECLEAR:
4836 error = EINVAL;
4837 goto out;
4838 default:
4839 break;
4840 }
4841
4842 if (fp->f_type != DTYPE_VNODE) {
4843 error = EBADF;
4844 goto out;
4845 }
4846 vp = (struct vnode *)fp_get_data(fp);
4847 proc_fdunlock(p);
4848
4849 if ((error = vnode_getwithref(vp)) == 0) {
4850 #define STK_PARAMS 128
4851 char stkbuf[STK_PARAMS] = {0};
4852 unsigned int size;
4853 caddr_t data, memp;
4854 /*
4855 * For this to work properly, we have to copy in the
4856 * ioctl() cmd argument if there is one; we must also
4857 * check that a command parameter, if present, does
4858 * not exceed the maximum command length dictated by
4859 * the number of bits we have available in the command
4860 * to represent a structure length. Finally, we have
4861 * to copy the results back out, if it is that type of
4862 * ioctl().
4863 */
4864 size = IOCPARM_LEN(cmd);
4865 if (size > IOCPARM_MAX) {
4866 (void)vnode_put(vp);
4867 error = EINVAL;
4868 break;
4869 }
4870
4871 memp = NULL;
4872 if (size > sizeof(stkbuf)) {
4873 memp = (caddr_t)kalloc_data(size, Z_WAITOK);
4874 if (memp == 0) {
4875 (void)vnode_put(vp);
4876 error = ENOMEM;
4877 goto outdrop;
4878 }
4879 data = memp;
4880 } else {
4881 data = &stkbuf[0];
4882 }
4883
4884 if (cmd & IOC_IN) {
4885 if (size) {
4886 /* structure */
4887 error = copyin(argp, data, size);
4888 if (error) {
4889 (void)vnode_put(vp);
4890 if (memp) {
4891 kfree_data(memp, size);
4892 }
4893 goto outdrop;
4894 }
4895
4896 /* Bzero the section beyond that which was needed */
4897 if (size <= sizeof(stkbuf)) {
4898 bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
4899 }
4900 } else {
4901 /* int */
4902 if (is64bit) {
4903 *(user_addr_t *)data = argp;
4904 } else {
4905 *(uint32_t *)data = (uint32_t)argp;
4906 }
4907 };
4908 } else if ((cmd & IOC_OUT) && size) {
4909 /*
4910 * Zero the buffer so the user always
4911 * gets back something deterministic.
4912 */
4913 bzero(data, size);
4914 } else if (cmd & IOC_VOID) {
4915 if (is64bit) {
4916 *(user_addr_t *)data = argp;
4917 } else {
4918 *(uint32_t *)data = (uint32_t)argp;
4919 }
4920 }
4921
4922 error = VNOP_IOCTL(vp, cmd, CAST_DOWN(caddr_t, data), 0, &context);
4923
4924 (void)vnode_put(vp);
4925
4926 /* Copy any output data to user */
4927 if (error == 0 && (cmd & IOC_OUT) && size) {
4928 error = copyout(data, argp, size);
4929 }
4930 if (memp) {
4931 kfree_data(memp, size);
4932 }
4933 }
4934 break;
4935 }
4936
4937 outdrop:
4938 return sys_fcntl_outdrop(p, fd, fp, vp, error);
4939
4940 out:
4941 return sys_fcntl_out(p, fd, fp, error);
4942 }
4943
4944
4945 /*
4946 * sys_close
4947 *
4948 * Description: The implementation of the close(2) system call
4949 *
4950 * Parameters: p Process in whose per process file table
4951 * the close is to occur
4952 * uap->fd fd to be closed
4953 * retval <unused>
4954 *
4955 * Returns: 0 Success
4956 * fp_lookup:EBADF Bad file descriptor
4957 * fp_guard_exception:??? Guarded file descriptor
4958 * close_internal:EBADF
4959 * close_internal:??? Anything returnable by a per-fileops
4960 * close function
4961 */
4962 int
sys_close(proc_t p,struct close_args * uap,__unused int32_t * retval)4963 sys_close(proc_t p, struct close_args *uap, __unused int32_t *retval)
4964 {
4965 __pthread_testcancel(1);
4966 return close_nocancel(p, uap->fd);
4967 }
4968
4969 int
sys_close_nocancel(proc_t p,struct close_nocancel_args * uap,__unused int32_t * retval)4970 sys_close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
4971 {
4972 return close_nocancel(p, uap->fd);
4973 }
4974
4975 int
close_nocancel(proc_t p,int fd)4976 close_nocancel(proc_t p, int fd)
4977 {
4978 struct fileproc *fp;
4979
4980 AUDIT_SYSCLOSE(p, fd);
4981
4982 proc_fdlock(p);
4983 if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
4984 proc_fdunlock(p);
4985 return EBADF;
4986 }
4987
4988 if (fp_isguarded(fp, GUARD_CLOSE)) {
4989 int error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
4990 proc_fdunlock(p);
4991 return error;
4992 }
4993
4994 return fp_close_and_unlock(p, fd, fp, 0);
4995 }
4996
4997
4998 /*
4999 * fstat
5000 *
5001 * Description: Return status information about a file descriptor.
5002 *
5003 * Parameters: p The process doing the fstat
5004 * fd The fd to stat
5005 * ub The user stat buffer
5006 * xsecurity The user extended security
5007 * buffer, or 0 if none
5008 * xsecurity_size The size of xsecurity, or 0
5009 * if no xsecurity
5010 * isstat64 Flag to indicate 64 bit version
5011 * for inode size, etc.
5012 *
5013 * Returns: 0 Success
5014 * EBADF
5015 * EFAULT
5016 * fp_lookup:EBADF Bad file descriptor
5017 * vnode_getwithref:???
5018 * copyout:EFAULT
5019 * vnode_getwithref:???
5020 * vn_stat:???
5021 * soo_stat:???
5022 * pipe_stat:???
5023 * pshm_stat:???
5024 * kqueue_stat:???
5025 *
5026 * Notes: Internal implementation for all other fstat() related
5027 * functions
5028 *
5029 * XXX switch on node type is bogus; need a stat in struct
5030 * XXX fileops instead.
5031 */
5032 static int
fstat(proc_t p,int fd,user_addr_t ub,user_addr_t xsecurity,user_addr_t xsecurity_size,int isstat64)5033 fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity,
5034 user_addr_t xsecurity_size, int isstat64)
5035 {
5036 struct fileproc *fp;
5037 union {
5038 struct stat sb;
5039 struct stat64 sb64;
5040 } source;
5041 union {
5042 struct user64_stat user64_sb;
5043 struct user32_stat user32_sb;
5044 struct user64_stat64 user64_sb64;
5045 struct user32_stat64 user32_sb64;
5046 } dest;
5047 int error, my_size;
5048 file_type_t type;
5049 caddr_t data;
5050 kauth_filesec_t fsec;
5051 user_size_t xsecurity_bufsize;
5052 vfs_context_t ctx = vfs_context_current();
5053 void * sbptr;
5054
5055
5056 AUDIT_ARG(fd, fd);
5057
5058 if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
5059 return error;
5060 }
5061 type = fp->f_type;
5062 data = (caddr_t)fp_get_data(fp);
5063 fsec = KAUTH_FILESEC_NONE;
5064
5065 sbptr = (void *)&source;
5066
5067 switch (type) {
5068 case DTYPE_VNODE:
5069 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
5070 /*
5071 * If the caller has the file open, and is not
5072 * requesting extended security information, we are
5073 * going to let them get the basic stat information.
5074 */
5075 if (xsecurity == USER_ADDR_NULL) {
5076 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
5077 fp->fp_glob->fg_cred);
5078 } else {
5079 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
5080 }
5081
5082 AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
5083 (void)vnode_put((vnode_t)data);
5084 }
5085 break;
5086
5087 #if SOCKETS
5088 case DTYPE_SOCKET:
5089 error = soo_stat((struct socket *)data, sbptr, isstat64);
5090 break;
5091 #endif /* SOCKETS */
5092
5093 case DTYPE_PIPE:
5094 error = pipe_stat((void *)data, sbptr, isstat64);
5095 break;
5096
5097 case DTYPE_PSXSHM:
5098 error = pshm_stat((void *)data, sbptr, isstat64);
5099 break;
5100
5101 case DTYPE_KQUEUE:
5102 error = kqueue_stat((void *)data, sbptr, isstat64, p);
5103 break;
5104
5105 default:
5106 error = EBADF;
5107 goto out;
5108 }
5109 if (error == 0) {
5110 caddr_t sbp;
5111
5112 if (isstat64 != 0) {
5113 source.sb64.st_lspare = 0;
5114 source.sb64.st_qspare[0] = 0LL;
5115 source.sb64.st_qspare[1] = 0LL;
5116
5117 if (IS_64BIT_PROCESS(p)) {
5118 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5119 my_size = sizeof(dest.user64_sb64);
5120 sbp = (caddr_t)&dest.user64_sb64;
5121 } else {
5122 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5123 my_size = sizeof(dest.user32_sb64);
5124 sbp = (caddr_t)&dest.user32_sb64;
5125 }
5126 } else {
5127 source.sb.st_lspare = 0;
5128 source.sb.st_qspare[0] = 0LL;
5129 source.sb.st_qspare[1] = 0LL;
5130 if (IS_64BIT_PROCESS(p)) {
5131 munge_user64_stat(&source.sb, &dest.user64_sb);
5132 my_size = sizeof(dest.user64_sb);
5133 sbp = (caddr_t)&dest.user64_sb;
5134 } else {
5135 munge_user32_stat(&source.sb, &dest.user32_sb);
5136 my_size = sizeof(dest.user32_sb);
5137 sbp = (caddr_t)&dest.user32_sb;
5138 }
5139 }
5140
5141 error = copyout(sbp, ub, my_size);
5142 }
5143
5144 /* caller wants extended security information? */
5145 if (xsecurity != USER_ADDR_NULL) {
5146 /* did we get any? */
5147 if (fsec == KAUTH_FILESEC_NONE) {
5148 if (susize(xsecurity_size, 0) != 0) {
5149 error = EFAULT;
5150 goto out;
5151 }
5152 } else {
5153 /* find the user buffer size */
5154 xsecurity_bufsize = fusize(xsecurity_size);
5155
5156 /* copy out the actual data size */
5157 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5158 error = EFAULT;
5159 goto out;
5160 }
5161
5162 /* if the caller supplied enough room, copy out to it */
5163 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
5164 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5165 }
5166 }
5167 }
5168 out:
5169 fp_drop(p, fd, fp, 0);
5170 if (fsec != NULL) {
5171 kauth_filesec_free(fsec);
5172 }
5173 return error;
5174 }
5175
5176
5177 /*
5178 * sys_fstat_extended
5179 *
5180 * Description: Extended version of fstat supporting returning extended
5181 * security information
5182 *
5183 * Parameters: p The process doing the fstat
5184 * uap->fd The fd to stat
5185 * uap->ub The user stat buffer
5186 * uap->xsecurity The user extended security
5187 * buffer, or 0 if none
5188 * uap->xsecurity_size The size of xsecurity, or 0
5189 *
5190 * Returns: 0 Success
5191 * !0 Errno (see fstat)
5192 */
5193 int
sys_fstat_extended(proc_t p,struct fstat_extended_args * uap,__unused int32_t * retval)5194 sys_fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
5195 {
5196 return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
5197 }
5198
5199
5200 /*
5201 * sys_fstat
5202 *
5203 * Description: Get file status for the file associated with fd
5204 *
5205 * Parameters: p The process doing the fstat
5206 * uap->fd The fd to stat
5207 * uap->ub The user stat buffer
5208 *
5209 * Returns: 0 Success
5210 * !0 Errno (see fstat)
5211 */
5212 int
sys_fstat(proc_t p,struct fstat_args * uap,__unused int32_t * retval)5213 sys_fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
5214 {
5215 return fstat(p, uap->fd, uap->ub, 0, 0, 0);
5216 }
5217
5218
5219 /*
5220 * sys_fstat64_extended
5221 *
5222 * Description: Extended version of fstat64 supporting returning extended
5223 * security information
5224 *
5225 * Parameters: p The process doing the fstat
5226 * uap->fd The fd to stat
5227 * uap->ub The user stat buffer
5228 * uap->xsecurity The user extended security
5229 * buffer, or 0 if none
5230 * uap->xsecurity_size The size of xsecurity, or 0
5231 *
5232 * Returns: 0 Success
5233 * !0 Errno (see fstat)
5234 */
5235 int
sys_fstat64_extended(proc_t p,struct fstat64_extended_args * uap,__unused int32_t * retval)5236 sys_fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
5237 {
5238 return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
5239 }
5240
5241
5242 /*
5243 * sys_fstat64
5244 *
5245 * Description: Get 64 bit version of the file status for the file associated
5246 * with fd
5247 *
5248 * Parameters: p The process doing the fstat
5249 * uap->fd The fd to stat
5250 * uap->ub The user stat buffer
5251 *
5252 * Returns: 0 Success
5253 * !0 Errno (see fstat)
5254 */
5255 int
sys_fstat64(proc_t p,struct fstat64_args * uap,__unused int32_t * retval)5256 sys_fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
5257 {
5258 return fstat(p, uap->fd, uap->ub, 0, 0, 1);
5259 }
5260
5261
5262 /*
5263 * sys_fpathconf
5264 *
5265 * Description: Return pathconf information about a file descriptor.
5266 *
5267 * Parameters: p Process making the request
5268 * uap->fd fd to get information about
5269 * uap->name Name of information desired
5270 * retval Pointer to the call return area
5271 *
5272 * Returns: 0 Success
5273 * EINVAL
5274 * fp_lookup:EBADF Bad file descriptor
5275 * vnode_getwithref:???
5276 * vn_pathconf:???
5277 *
5278 * Implicit returns:
5279 * *retval (modified) Returned information (numeric)
5280 */
5281 int
sys_fpathconf(proc_t p,struct fpathconf_args * uap,int32_t * retval)5282 sys_fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
5283 {
5284 int fd = uap->fd;
5285 struct fileproc *fp;
5286 struct vnode *vp;
5287 int error = 0;
5288 file_type_t type;
5289
5290
5291 AUDIT_ARG(fd, uap->fd);
5292 if ((error = fp_lookup(p, fd, &fp, 0))) {
5293 return error;
5294 }
5295 type = fp->f_type;
5296
5297 switch (type) {
5298 case DTYPE_SOCKET:
5299 if (uap->name != _PC_PIPE_BUF) {
5300 error = EINVAL;
5301 goto out;
5302 }
5303 *retval = PIPE_BUF;
5304 error = 0;
5305 goto out;
5306
5307 case DTYPE_PIPE:
5308 if (uap->name != _PC_PIPE_BUF) {
5309 error = EINVAL;
5310 goto out;
5311 }
5312 *retval = PIPE_BUF;
5313 error = 0;
5314 goto out;
5315
5316 case DTYPE_VNODE:
5317 vp = (struct vnode *)fp_get_data(fp);
5318
5319 if ((error = vnode_getwithref(vp)) == 0) {
5320 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5321
5322 error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
5323
5324 (void)vnode_put(vp);
5325 }
5326 goto out;
5327
5328 default:
5329 error = EINVAL;
5330 goto out;
5331 }
5332 /*NOTREACHED*/
5333 out:
5334 fp_drop(p, fd, fp, 0);
5335 return error;
5336 }
5337
5338 /*
5339 * sys_flock
5340 *
5341 * Description: Apply an advisory lock on a file descriptor.
5342 *
5343 * Parameters: p Process making request
5344 * uap->fd fd on which the lock is to be
5345 * attempted
5346 * uap->how (Un)Lock bits, including type
5347 * retval Pointer to the call return area
5348 *
5349 * Returns: 0 Success
5350 * fp_getfvp:EBADF Bad file descriptor
5351 * fp_getfvp:ENOTSUP fd does not refer to a vnode
5352 * vnode_getwithref:???
5353 * VNOP_ADVLOCK:???
5354 *
5355 * Implicit returns:
5356 * *retval (modified) Size of dtable
5357 *
5358 * Notes: Just attempt to get a record lock of the requested type on
5359 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5360 */
5361 int
sys_flock(proc_t p,struct flock_args * uap,__unused int32_t * retval)5362 sys_flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5363 {
5364 int fd = uap->fd;
5365 int how = uap->how;
5366 struct fileproc *fp;
5367 struct vnode *vp;
5368 struct flock lf;
5369 vfs_context_t ctx = vfs_context_current();
5370 int error = 0;
5371
5372 AUDIT_ARG(fd, uap->fd);
5373 if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5374 return error;
5375 }
5376 if ((error = vnode_getwithref(vp))) {
5377 goto out1;
5378 }
5379 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5380
5381 lf.l_whence = SEEK_SET;
5382 lf.l_start = 0;
5383 lf.l_len = 0;
5384 if (how & LOCK_UN) {
5385 lf.l_type = F_UNLCK;
5386 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5387 goto out;
5388 }
5389 if (how & LOCK_EX) {
5390 lf.l_type = F_WRLCK;
5391 } else if (how & LOCK_SH) {
5392 lf.l_type = F_RDLCK;
5393 } else {
5394 error = EBADF;
5395 goto out;
5396 }
5397 #if CONFIG_MACF
5398 error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob, F_SETLK, &lf);
5399 if (error) {
5400 goto out;
5401 }
5402 #endif
5403 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
5404 (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5405 ctx, NULL);
5406 if (!error) {
5407 os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
5408 }
5409 out:
5410 (void)vnode_put(vp);
5411 out1:
5412 fp_drop(p, fd, fp, 0);
5413 return error;
5414 }
5415
5416 /*
5417 * sys_fileport_makeport
5418 *
5419 * Description: Obtain a Mach send right for a given file descriptor.
5420 *
5421 * Parameters: p Process calling fileport
5422 * uap->fd The fd to reference
5423 * uap->portnamep User address at which to place port name.
5424 *
5425 * Returns: 0 Success.
5426 * EBADF Bad file descriptor.
5427 * EINVAL File descriptor had type that cannot be sent, misc. other errors.
5428 * EFAULT Address at which to store port name is not valid.
5429 * EAGAIN Resource shortage.
5430 *
5431 * Implicit returns:
5432 * On success, name of send right is stored at user-specified address.
5433 */
5434 int
sys_fileport_makeport(proc_t p,struct fileport_makeport_args * uap,__unused int * retval)5435 sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5436 __unused int *retval)
5437 {
5438 int err;
5439 int fd = uap->fd;
5440 user_addr_t user_portaddr = uap->portnamep;
5441 struct fileproc *fp = FILEPROC_NULL;
5442 struct fileglob *fg = NULL;
5443 ipc_port_t fileport;
5444 mach_port_name_t name = MACH_PORT_NULL;
5445
5446 proc_fdlock(p);
5447 err = fp_lookup(p, fd, &fp, 1);
5448 if (err != 0) {
5449 goto out_unlock;
5450 }
5451
5452 fg = fp->fp_glob;
5453 if (!fg_sendable(fg)) {
5454 err = EINVAL;
5455 goto out_unlock;
5456 }
5457
5458 if (fp_isguarded(fp, GUARD_FILEPORT)) {
5459 err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5460 goto out_unlock;
5461 }
5462
5463 /* Dropped when port is deallocated */
5464 fg_ref(p, fg);
5465
5466 proc_fdunlock(p);
5467
5468 /* Allocate and initialize a port */
5469 fileport = fileport_alloc(fg);
5470 if (fileport == IPC_PORT_NULL) {
5471 fg_drop_live(fg);
5472 err = EAGAIN;
5473 goto out;
5474 }
5475
5476 /* Add an entry. Deallocates port on failure. */
5477 name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
5478 if (!MACH_PORT_VALID(name)) {
5479 err = EINVAL;
5480 goto out;
5481 }
5482
5483 err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5484 if (err != 0) {
5485 goto out;
5486 }
5487
5488 /* Tag the fileglob for debugging purposes */
5489 lck_mtx_lock_spin(&fg->fg_lock);
5490 fg->fg_lflags |= FG_PORTMADE;
5491 lck_mtx_unlock(&fg->fg_lock);
5492
5493 fp_drop(p, fd, fp, 0);
5494
5495 return 0;
5496
5497 out_unlock:
5498 proc_fdunlock(p);
5499 out:
5500 if (MACH_PORT_VALID(name)) {
5501 /* Don't care if another thread races us to deallocate the entry */
5502 (void) mach_port_deallocate(get_task_ipcspace(p->task), name);
5503 }
5504
5505 if (fp != FILEPROC_NULL) {
5506 fp_drop(p, fd, fp, 0);
5507 }
5508
5509 return err;
5510 }
5511
5512 void
fileport_releasefg(struct fileglob * fg)5513 fileport_releasefg(struct fileglob *fg)
5514 {
5515 (void)fg_drop(PROC_NULL, fg);
5516 }
5517
5518 /*
5519 * fileport_makefd
5520 *
5521 * Description: Obtain the file descriptor for a given Mach send right.
5522 *
5523 * Returns: 0 Success
5524 * EINVAL Invalid Mach port name, or port is not for a file.
5525 * fdalloc:EMFILE
5526 * fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5527 *
5528 * Implicit returns:
5529 * *retval (modified) The new descriptor
5530 */
5531 int
fileport_makefd(proc_t p,ipc_port_t port,fileproc_flags_t fp_flags,int * retval)5532 fileport_makefd(proc_t p, ipc_port_t port, fileproc_flags_t fp_flags, int *retval)
5533 {
5534 struct fileglob *fg;
5535 struct fileproc *fp = FILEPROC_NULL;
5536 int fd;
5537 int err;
5538
5539 fg = fileport_port_to_fileglob(port);
5540 if (fg == NULL) {
5541 err = EINVAL;
5542 goto out;
5543 }
5544
5545 fp = fileproc_alloc_init();
5546
5547 proc_fdlock(p);
5548 err = fdalloc(p, 0, &fd);
5549 if (err != 0) {
5550 proc_fdunlock(p);
5551 goto out;
5552 }
5553 if (fp_flags) {
5554 fp->fp_flags |= fp_flags;
5555 }
5556
5557 fp->fp_glob = fg;
5558 fg_ref(p, fg);
5559
5560 procfdtbl_releasefd(p, fd, fp);
5561 proc_fdunlock(p);
5562
5563 *retval = fd;
5564 err = 0;
5565 out:
5566 if ((fp != NULL) && (0 != err)) {
5567 fileproc_free(fp);
5568 }
5569
5570 return err;
5571 }
5572
5573 /*
5574 * sys_fileport_makefd
5575 *
5576 * Description: Obtain the file descriptor for a given Mach send right.
5577 *
5578 * Parameters: p Process calling fileport
5579 * uap->port Name of send right to file port.
5580 *
5581 * Returns: 0 Success
5582 * EINVAL Invalid Mach port name, or port is not for a file.
5583 * fdalloc:EMFILE
5584 * fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5585 *
5586 * Implicit returns:
5587 * *retval (modified) The new descriptor
5588 */
5589 int
sys_fileport_makefd(proc_t p,struct fileport_makefd_args * uap,int32_t * retval)5590 sys_fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5591 {
5592 ipc_port_t port = IPC_PORT_NULL;
5593 mach_port_name_t send = uap->port;
5594 kern_return_t res;
5595 int err;
5596
5597 res = ipc_object_copyin(get_task_ipcspace(p->task),
5598 send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
5599
5600 if (res == KERN_SUCCESS) {
5601 err = fileport_makefd(p, port, FP_CLOEXEC, retval);
5602 } else {
5603 err = EINVAL;
5604 }
5605
5606 if (IPC_PORT_NULL != port) {
5607 ipc_port_release_send(port);
5608 }
5609
5610 return err;
5611 }
5612
5613
5614 #pragma mark fileops wrappers
5615
5616 /*
5617 * fo_read
5618 *
5619 * Description: Generic fileops read indirected through the fileops pointer
5620 * in the fileproc structure
5621 *
5622 * Parameters: fp fileproc structure pointer
5623 * uio user I/O structure pointer
5624 * flags FOF_ flags
5625 * ctx VFS context for operation
5626 *
5627 * Returns: 0 Success
5628 * !0 Errno from read
5629 */
5630 int
fo_read(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5631 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5632 {
5633 return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
5634 }
5635
5636 int
fo_no_read(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5637 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5638 {
5639 #pragma unused(fp, uio, flags, ctx)
5640 return ENXIO;
5641 }
5642
5643
5644 /*
5645 * fo_write
5646 *
5647 * Description: Generic fileops write indirected through the fileops pointer
5648 * in the fileproc structure
5649 *
5650 * Parameters: fp fileproc structure pointer
5651 * uio user I/O structure pointer
5652 * flags FOF_ flags
5653 * ctx VFS context for operation
5654 *
5655 * Returns: 0 Success
5656 * !0 Errno from write
5657 */
5658 int
fo_write(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5659 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5660 {
5661 return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
5662 }
5663
5664 int
fo_no_write(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5665 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5666 {
5667 #pragma unused(fp, uio, flags, ctx)
5668 return ENXIO;
5669 }
5670
5671
5672 /*
5673 * fo_ioctl
5674 *
5675 * Description: Generic fileops ioctl indirected through the fileops pointer
5676 * in the fileproc structure
5677 *
5678 * Parameters: fp fileproc structure pointer
5679 * com ioctl command
5680 * data pointer to internalized copy
5681 * of user space ioctl command
5682 * parameter data in kernel space
5683 * ctx VFS context for operation
5684 *
5685 * Returns: 0 Success
5686 * !0 Errno from ioctl
5687 *
5688 * Locks: The caller is assumed to have held the proc_fdlock; this
5689 * function releases and reacquires this lock. If the caller
5690 * accesses data protected by this lock prior to calling this
5691 * function, it will need to revalidate/reacquire any cached
5692 * protected data obtained prior to the call.
5693 */
5694 int
fo_ioctl(struct fileproc * fp,u_long com,caddr_t data,vfs_context_t ctx)5695 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5696 {
5697 int error;
5698
5699 proc_fdunlock(vfs_context_proc(ctx));
5700 error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5701 proc_fdlock(vfs_context_proc(ctx));
5702 return error;
5703 }
5704
5705 int
fo_no_ioctl(struct fileproc * fp,u_long com,caddr_t data,vfs_context_t ctx)5706 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5707 {
5708 #pragma unused(fp, com, data, ctx)
5709 return ENOTTY;
5710 }
5711
5712
5713 /*
5714 * fo_select
5715 *
5716 * Description: Generic fileops select indirected through the fileops pointer
5717 * in the fileproc structure
5718 *
5719 * Parameters: fp fileproc structure pointer
5720 * which select which
5721 * wql pointer to wait queue list
5722 * ctx VFS context for operation
5723 *
5724 * Returns: 0 Success
5725 * !0 Errno from select
5726 */
5727 int
fo_select(struct fileproc * fp,int which,void * wql,vfs_context_t ctx)5728 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5729 {
5730 return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
5731 }
5732
5733 int
fo_no_select(struct fileproc * fp,int which,void * wql,vfs_context_t ctx)5734 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5735 {
5736 #pragma unused(fp, which, wql, ctx)
5737 return ENOTSUP;
5738 }
5739
5740
5741 /*
5742 * fo_close
5743 *
5744 * Description: Generic fileops close indirected through the fileops pointer
5745 * in the fileproc structure
5746 *
5747 * Parameters: fp fileproc structure pointer for
5748 * file to close
5749 * ctx VFS context for operation
5750 *
5751 * Returns: 0 Success
5752 * !0 Errno from close
5753 */
5754 int
fo_close(struct fileglob * fg,vfs_context_t ctx)5755 fo_close(struct fileglob *fg, vfs_context_t ctx)
5756 {
5757 return (*fg->fg_ops->fo_close)(fg, ctx);
5758 }
5759
5760
5761 /*
5762 * fo_drain
5763 *
5764 * Description: Generic fileops kqueue filter indirected through the fileops
5765 * pointer in the fileproc structure
5766 *
5767 * Parameters: fp fileproc structure pointer
5768 * ctx VFS context for operation
5769 *
5770 * Returns: 0 Success
5771 * !0 errno from drain
5772 */
5773 int
fo_drain(struct fileproc * fp,vfs_context_t ctx)5774 fo_drain(struct fileproc *fp, vfs_context_t ctx)
5775 {
5776 return (*fp->f_ops->fo_drain)(fp, ctx);
5777 }
5778
5779 int
fo_no_drain(struct fileproc * fp,vfs_context_t ctx)5780 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
5781 {
5782 #pragma unused(fp, ctx)
5783 return ENOTSUP;
5784 }
5785
5786
5787 /*
5788 * fo_kqfilter
5789 *
5790 * Description: Generic fileops kqueue filter indirected through the fileops
5791 * pointer in the fileproc structure
5792 *
5793 * Parameters: fp fileproc structure pointer
5794 * kn pointer to knote to filter on
5795 *
5796 * Returns: (kn->kn_flags & EV_ERROR) error in kn->kn_data
5797 * 0 Filter is not active
5798 * !0 Filter is active
5799 */
5800 int
fo_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)5801 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5802 {
5803 return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
5804 }
5805
5806 int
fo_no_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)5807 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5808 {
5809 #pragma unused(fp, kev)
5810 knote_set_error(kn, ENOTSUP);
5811 return 0;
5812 }
5813