xref: /xnu-8019.80.24/bsd/kern/kern_descrip.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  * (c) UNIX System Laboratories, Inc.
33  * All or some portions of this file are derived from material licensed
34  * to the University of California by American Telephone and Telegraph
35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36  * the permission of UNIX System Laboratories, Inc.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. All advertising materials mentioning features or use of this software
47  *    must display the following acknowledgement:
48  *	This product includes software developed by the University of
49  *	California, Berkeley and its contributors.
50  * 4. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)kern_descrip.c	8.8 (Berkeley) 2/14/95
67  */
68 /*
69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70  * support for mandatory and extensible security protections.  This notice
71  * is included in support of clause 2.2 (b) of the Apple Public License,
72  * Version 2.0.
73  */
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/kauth.h>
82 #include <sys/file_internal.h>
83 #include <sys/guarded.h>
84 #include <sys/priv.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/stat.h>
88 #include <sys/ioctl.h>
89 #include <sys/fcntl.h>
90 #include <sys/fsctl.h>
91 #include <sys/malloc.h>
92 #include <sys/mman.h>
93 #include <sys/mount.h>
94 #include <sys/syslog.h>
95 #include <sys/unistd.h>
96 #include <sys/resourcevar.h>
97 #include <sys/aio_kern.h>
98 #include <sys/ev.h>
99 #include <kern/locks.h>
100 #include <sys/uio_internal.h>
101 #include <sys/codesign.h>
102 #include <sys/codedir_internal.h>
103 #include <sys/mount_internal.h>
104 #include <sys/kdebug.h>
105 #include <sys/sysproto.h>
106 #include <sys/pipe.h>
107 #include <sys/spawn.h>
108 #include <sys/cprotect.h>
109 #include <sys/ubc_internal.h>
110 
111 #include <kern/kern_types.h>
112 #include <kern/kalloc.h>
113 #include <kern/waitq.h>
114 #include <kern/ipc_misc.h>
115 #include <kern/ast.h>
116 
117 #include <vm/vm_protos.h>
118 #include <mach/mach_port.h>
119 
120 #include <security/audit/audit.h>
121 #if CONFIG_MACF
122 #include <security/mac_framework.h>
123 #endif
124 
125 #include <stdbool.h>
126 #include <os/atomic_private.h>
127 #include <os/overflow.h>
128 #include <IOKit/IOBSD.h>
129 
130 #define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
131 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
132     mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
133 void ipc_port_release_send(ipc_port_t);
134 
135 void fileport_releasefg(struct fileglob *fg);
136 
137 /* flags for fp_close_and_unlock */
138 #define FD_DUP2RESV 1
139 
140 /* We don't want these exported */
141 
142 __private_extern__
143 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
144 
145 /* Conflict wait queue for when selects collide (opaque type) */
146 extern struct waitq select_conflict_queue;
147 
148 #define f_flag fp_glob->fg_flag
149 #define f_type fp_glob->fg_ops->fo_type
150 #define f_cred fp_glob->fg_cred
151 #define f_ops fp_glob->fg_ops
152 #define f_offset fp_glob->fg_offset
153 
154 static SECURITY_READ_ONLY_LATE(zone_t) fp_zone;
155 ZONE_INIT(&fp_zone, "fileproc", sizeof(struct fileproc),
156     ZC_ZFREE_CLEARMEM, ZONE_ID_FILEPROC, NULL);
157 
158 ZONE_DECLARE(fg_zone, "fileglob", sizeof(struct fileglob), ZC_ZFREE_CLEARMEM);
159 /*
160  * If you need accounting for KM_OFILETABL consider using
161  * KALLOC_HEAP_DEFINE to define a view.
162  */
163 #define KM_OFILETABL KHEAP_DEFAULT
164 
165 /*
166  * Descriptor management.
167  */
168 int nfiles;                     /* actual number of open files */
169 /*
170  * "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
171  */
172 static const struct fileops uninitops;
173 
174 os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
175 static LCK_GRP_DECLARE(file_lck_grp, "file");
176 
177 
178 #pragma mark fileglobs
179 
180 /*!
181  * @function fg_free
182  *
183  * @brief
184  * Free a file structure.
185  */
186 static void
fg_free(struct fileglob * fg)187 fg_free(struct fileglob *fg)
188 {
189 	os_atomic_dec(&nfiles, relaxed);
190 
191 	if (fg->fg_vn_data) {
192 		fg_vn_data_free(fg->fg_vn_data);
193 		fg->fg_vn_data = NULL;
194 	}
195 
196 	kauth_cred_t cred = fg->fg_cred;
197 	if (IS_VALID_CRED(cred)) {
198 		kauth_cred_unref(&cred);
199 		fg->fg_cred = NOCRED;
200 	}
201 	lck_mtx_destroy(&fg->fg_lock, &file_lck_grp);
202 
203 #if CONFIG_MACF && CONFIG_VNGUARD
204 	vng_file_label_destroy(fg);
205 #endif
206 	zfree(fg_zone, fg);
207 }
208 
209 OS_ALWAYS_INLINE
210 void
fg_ref(proc_t p,struct fileglob * fg)211 fg_ref(proc_t p, struct fileglob *fg)
212 {
213 #if DEBUG || DEVELOPMENT
214 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
215 #else
216 	(void)p;
217 #endif
218 	os_ref_retain_raw(&fg->fg_count, &f_refgrp);
219 }
220 
221 void
fg_drop_live(struct fileglob * fg)222 fg_drop_live(struct fileglob *fg)
223 {
224 	os_ref_release_live_raw(&fg->fg_count, &f_refgrp);
225 }
226 
227 int
fg_drop(proc_t p,struct fileglob * fg)228 fg_drop(proc_t p, struct fileglob *fg)
229 {
230 	struct vnode *vp;
231 	struct vfs_context context;
232 	int error = 0;
233 
234 	if (fg == NULL) {
235 		return 0;
236 	}
237 
238 	/* Set up context with cred stashed in fg */
239 	if (p == current_proc()) {
240 		context.vc_thread = current_thread();
241 	} else {
242 		context.vc_thread = NULL;
243 	}
244 	context.vc_ucred = fg->fg_cred;
245 
246 	/*
247 	 * POSIX record locking dictates that any close releases ALL
248 	 * locks owned by this process.  This is handled by setting
249 	 * a flag in the unlock to free ONLY locks obeying POSIX
250 	 * semantics, and not to free BSD-style file locks.
251 	 * If the descriptor was in a message, POSIX-style locks
252 	 * aren't passed with the descriptor.
253 	 */
254 	if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
255 	    (p->p_ladvflag & P_LADVLOCK)) {
256 		struct flock lf = {
257 			.l_whence = SEEK_SET,
258 			.l_type = F_UNLCK,
259 		};
260 
261 		vp = (struct vnode *)fg_get_data(fg);
262 		if ((error = vnode_getwithref(vp)) == 0) {
263 			(void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
264 			(void)vnode_put(vp);
265 		}
266 	}
267 
268 	if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == 0) {
269 		/*
270 		 * Since we ensure that fg->fg_ops is always initialized,
271 		 * it is safe to invoke fo_close on the fg
272 		 */
273 		error = fo_close(fg, &context);
274 
275 		fg_free(fg);
276 	}
277 
278 	return error;
279 }
280 
281 inline
282 void
fg_set_data(struct fileglob * fg,void * fg_data)283 fg_set_data(
284 	struct fileglob *fg,
285 	void *fg_data)
286 {
287 	uintptr_t *store = &fg->fg_data;
288 
289 #if __has_feature(ptrauth_calls)
290 	int type = FILEGLOB_DTYPE(fg);
291 
292 	if (fg_data) {
293 		type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
294 		fg_data = ptrauth_sign_unauthenticated(fg_data,
295 		    ptrauth_key_process_independent_data,
296 		    ptrauth_blend_discriminator(store, type));
297 	}
298 #endif // __has_feature(ptrauth_calls)
299 
300 	*store = (uintptr_t)fg_data;
301 }
302 
303 inline
304 void *
fg_get_data_volatile(struct fileglob * fg)305 fg_get_data_volatile(struct fileglob *fg)
306 {
307 	uintptr_t *store = &fg->fg_data;
308 	void *fg_data = (void *)*store;
309 
310 #if __has_feature(ptrauth_calls)
311 	int type = FILEGLOB_DTYPE(fg);
312 
313 	if (fg_data) {
314 		type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
315 		fg_data = ptrauth_auth_data(fg_data,
316 		    ptrauth_key_process_independent_data,
317 		    ptrauth_blend_discriminator(store, type));
318 	}
319 #endif // __has_feature(ptrauth_calls)
320 
321 	return fg_data;
322 }
323 
324 
325 bool
fg_sendable(struct fileglob * fg)326 fg_sendable(struct fileglob *fg)
327 {
328 	switch (FILEGLOB_DTYPE(fg)) {
329 	case DTYPE_VNODE:
330 	case DTYPE_SOCKET:
331 	case DTYPE_PIPE:
332 	case DTYPE_PSXSHM:
333 	case DTYPE_NETPOLICY:
334 		return (fg->fg_lflags & FG_CONFINED) == 0;
335 
336 	default:
337 		return false;
338 	}
339 }
340 
341 #pragma mark file descriptor table (static helpers)
342 
343 static void
procfdtbl_reservefd(struct proc * p,int fd)344 procfdtbl_reservefd(struct proc * p, int fd)
345 {
346 	p->p_fd.fd_ofiles[fd] = NULL;
347 	p->p_fd.fd_ofileflags[fd] |= UF_RESERVED;
348 }
349 
350 void
procfdtbl_releasefd(struct proc * p,int fd,struct fileproc * fp)351 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
352 {
353 	if (fp != NULL) {
354 		p->p_fd.fd_ofiles[fd] = fp;
355 	}
356 	p->p_fd.fd_ofileflags[fd] &= ~UF_RESERVED;
357 	if ((p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
358 		p->p_fd.fd_ofileflags[fd] &= ~UF_RESVWAIT;
359 		wakeup(&p->p_fd);
360 	}
361 }
362 
363 static void
procfdtbl_waitfd(struct proc * p,int fd)364 procfdtbl_waitfd(struct proc * p, int fd)
365 {
366 	p->p_fd.fd_ofileflags[fd] |= UF_RESVWAIT;
367 	msleep(&p->p_fd, &p->p_fd.fd_lock, PRIBIO, "ftbl_waitfd", NULL);
368 }
369 
370 static void
procfdtbl_clearfd(struct proc * p,int fd)371 procfdtbl_clearfd(struct proc * p, int fd)
372 {
373 	int waiting;
374 
375 	waiting = (p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT);
376 	p->p_fd.fd_ofiles[fd] = NULL;
377 	p->p_fd.fd_ofileflags[fd] = 0;
378 	if (waiting == UF_RESVWAIT) {
379 		wakeup(&p->p_fd);
380 	}
381 }
382 
383 /*
384  * fdrelse
385  *
386  * Description:	Inline utility function to free an fd in a filedesc
387  *
388  * Parameters:	fdp				Pointer to filedesc fd lies in
389  *		fd				fd to free
390  *		reserv				fd should be reserved
391  *
392  * Returns:	void
393  *
394  * Locks:	Assumes proc_fdlock for process pointing to fdp is held by
395  *		the caller
396  */
397 void
fdrelse(struct proc * p,int fd)398 fdrelse(struct proc * p, int fd)
399 {
400 	struct filedesc *fdp = &p->p_fd;
401 	int nfd = 0;
402 
403 	if (fd < fdp->fd_freefile) {
404 		fdp->fd_freefile = fd;
405 	}
406 #if DIAGNOSTIC
407 	if (fd >= fdp->fd_afterlast) {
408 		panic("fdrelse: fd_afterlast inconsistent");
409 	}
410 #endif
411 	procfdtbl_clearfd(p, fd);
412 
413 	nfd = fdp->fd_afterlast;
414 	while (nfd > 0 && fdp->fd_ofiles[nfd - 1] == NULL &&
415 	    !(fdp->fd_ofileflags[nfd - 1] & UF_RESERVED)) {
416 		nfd--;
417 	}
418 	fdp->fd_afterlast = nfd;
419 
420 #if CONFIG_PROC_RESOURCE_LIMITS
421 	fdp->fd_nfiles_open--;
422 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
423 }
424 
425 
426 /*
427  * finishdup
428  *
429  * Description:	Common code for dup, dup2, and fcntl(F_DUPFD).
430  *
431  * Parameters:	p				Process performing the dup
432  *		old				The fd to dup
433  *		new				The fd to dup it to
434  *		fp_flags			Flags to augment the new fp
435  *		retval				Pointer to the call return area
436  *
437  * Returns:	0				Success
438  *		EBADF
439  *		ENOMEM
440  *
441  * Implicit returns:
442  *		*retval (modified)		The new descriptor
443  *
444  * Locks:	Assumes proc_fdlock for process pointing to fdp is held by
445  *		the caller
446  *
447  * Notes:	This function may drop and reacquire this lock; it is unsafe
448  *		for a caller to assume that other state protected by the lock
449  *		has not been subsequently changed out from under it.
450  */
451 static int
finishdup(proc_t p,struct filedesc * fdp,int old,int new,fileproc_flags_t fp_flags,int32_t * retval)452 finishdup(proc_t p, struct filedesc *fdp, int old, int new,
453     fileproc_flags_t fp_flags, int32_t *retval)
454 {
455 	struct fileproc *nfp;
456 	struct fileproc *ofp;
457 #if CONFIG_MACF
458 	int error;
459 	kauth_cred_t cred;
460 #endif
461 
462 #if DIAGNOSTIC
463 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
464 #endif
465 	if ((ofp = fdp->fd_ofiles[old]) == NULL ||
466 	    (fdp->fd_ofileflags[old] & UF_RESERVED)) {
467 		fdrelse(p, new);
468 		return EBADF;
469 	}
470 
471 #if CONFIG_MACF
472 	cred = kauth_cred_proc_ref(p);
473 	error = mac_file_check_dup(cred, ofp->fp_glob, new);
474 	kauth_cred_unref(&cred);
475 
476 	if (error) {
477 		fdrelse(p, new);
478 		return error;
479 	}
480 #endif
481 
482 	fg_ref(p, ofp->fp_glob);
483 
484 	proc_fdunlock(p);
485 
486 	nfp = fileproc_alloc_init();
487 
488 	if (fp_flags) {
489 		nfp->fp_flags |= fp_flags;
490 	}
491 	nfp->fp_glob = ofp->fp_glob;
492 
493 	proc_fdlock(p);
494 
495 #if DIAGNOSTIC
496 	if (fdp->fd_ofiles[new] != 0) {
497 		panic("finishdup: overwriting fd_ofiles with new %d", new);
498 	}
499 	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
500 		panic("finishdup: unreserved fileflags with new %d", new);
501 	}
502 #endif
503 
504 	if (new >= fdp->fd_afterlast) {
505 		fdp->fd_afterlast = new + 1;
506 	}
507 	procfdtbl_releasefd(p, new, nfp);
508 	*retval = new;
509 	return 0;
510 }
511 
512 
513 #pragma mark file descriptor table (exported functions)
514 
515 void
proc_dirs_lock_shared(proc_t p)516 proc_dirs_lock_shared(proc_t p)
517 {
518 	lck_rw_lock_shared(&p->p_fd.fd_dirs_lock);
519 }
520 
521 void
proc_dirs_unlock_shared(proc_t p)522 proc_dirs_unlock_shared(proc_t p)
523 {
524 	lck_rw_unlock_shared(&p->p_fd.fd_dirs_lock);
525 }
526 
527 void
proc_dirs_lock_exclusive(proc_t p)528 proc_dirs_lock_exclusive(proc_t p)
529 {
530 	lck_rw_lock_exclusive(&p->p_fd.fd_dirs_lock);
531 }
532 
533 void
proc_dirs_unlock_exclusive(proc_t p)534 proc_dirs_unlock_exclusive(proc_t p)
535 {
536 	lck_rw_unlock_exclusive(&p->p_fd.fd_dirs_lock);
537 }
538 
539 /*
540  * proc_fdlock, proc_fdlock_spin
541  *
542  * Description:	Lock to control access to the per process struct fileproc
543  *		and struct filedesc
544  *
545  * Parameters:	p				Process to take the lock on
546  *
547  * Returns:	void
548  *
549  * Notes:	The lock is initialized in forkproc() and destroyed in
550  *		reap_child_process().
551  */
552 void
proc_fdlock(proc_t p)553 proc_fdlock(proc_t p)
554 {
555 	lck_mtx_lock(&p->p_fd.fd_lock);
556 }
557 
558 void
proc_fdlock_spin(proc_t p)559 proc_fdlock_spin(proc_t p)
560 {
561 	lck_mtx_lock_spin(&p->p_fd.fd_lock);
562 }
563 
564 void
proc_fdlock_assert(proc_t p,int assertflags)565 proc_fdlock_assert(proc_t p, int assertflags)
566 {
567 	lck_mtx_assert(&p->p_fd.fd_lock, assertflags);
568 }
569 
570 
571 /*
572  * proc_fdunlock
573  *
574  * Description:	Unlock the lock previously locked by a call to proc_fdlock()
575  *
576  * Parameters:	p				Process to drop the lock on
577  *
578  * Returns:	void
579  */
580 void
proc_fdunlock(proc_t p)581 proc_fdunlock(proc_t p)
582 {
583 	lck_mtx_unlock(&p->p_fd.fd_lock);
584 }
585 
586 bool
fdt_available_locked(proc_t p,int n)587 fdt_available_locked(proc_t p, int n)
588 {
589 	struct filedesc *fdp = &p->p_fd;
590 	struct fileproc **fpp;
591 	char *flags;
592 	int i;
593 	int lim = proc_limitgetcur_nofile(p);
594 
595 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
596 		return true;
597 	}
598 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
599 	flags = &fdp->fd_ofileflags[fdp->fd_freefile];
600 	for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
601 		if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
602 			return true;
603 		}
604 	}
605 	return false;
606 }
607 
608 
609 struct fdt_iterator
fdt_next(proc_t p,int fd,bool only_settled)610 fdt_next(proc_t p, int fd, bool only_settled)
611 {
612 	struct fdt_iterator it;
613 	struct filedesc *fdp = &p->p_fd;
614 	struct fileproc *fp;
615 	int nfds = fdp->fd_afterlast;
616 
617 	while (++fd < nfds) {
618 		fp = fdp->fd_ofiles[fd];
619 		if (fp == NULL || fp->fp_glob == NULL) {
620 			continue;
621 		}
622 		if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
623 			continue;
624 		}
625 		it.fdti_fd = fd;
626 		it.fdti_fp = fp;
627 		return it;
628 	}
629 
630 	it.fdti_fd = nfds;
631 	it.fdti_fp = NULL;
632 	return it;
633 }
634 
635 struct fdt_iterator
fdt_prev(proc_t p,int fd,bool only_settled)636 fdt_prev(proc_t p, int fd, bool only_settled)
637 {
638 	struct fdt_iterator it;
639 	struct filedesc *fdp = &p->p_fd;
640 	struct fileproc *fp;
641 
642 	while (--fd >= 0) {
643 		fp = fdp->fd_ofiles[fd];
644 		if (fp == NULL || fp->fp_glob == NULL) {
645 			continue;
646 		}
647 		if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
648 			continue;
649 		}
650 		it.fdti_fd = fd;
651 		it.fdti_fp = fp;
652 		return it;
653 	}
654 
655 	it.fdti_fd = -1;
656 	it.fdti_fp = NULL;
657 	return it;
658 }
659 
660 void
fdt_init(proc_t p)661 fdt_init(proc_t p)
662 {
663 	struct filedesc *fdp = &p->p_fd;
664 
665 	lck_mtx_init(&fdp->fd_kqhashlock, &proc_kqhashlock_grp, &proc_lck_attr);
666 	lck_mtx_init(&fdp->fd_knhashlock, &proc_knhashlock_grp, &proc_lck_attr);
667 	lck_mtx_init(&fdp->fd_lock, &proc_fdmlock_grp, &proc_lck_attr);
668 	lck_rw_init(&fdp->fd_dirs_lock, &proc_dirslock_grp, &proc_lck_attr);
669 }
670 
671 void
fdt_destroy(proc_t p)672 fdt_destroy(proc_t p)
673 {
674 	struct filedesc *fdp = &p->p_fd;
675 
676 	lck_mtx_destroy(&fdp->fd_kqhashlock, &proc_kqhashlock_grp);
677 	lck_mtx_destroy(&fdp->fd_knhashlock, &proc_knhashlock_grp);
678 	lck_mtx_destroy(&fdp->fd_lock, &proc_fdmlock_grp);
679 	lck_rw_destroy(&fdp->fd_dirs_lock, &proc_dirslock_grp);
680 }
681 
682 void
fdt_exec(proc_t p,short posix_spawn_flags)683 fdt_exec(proc_t p, short posix_spawn_flags)
684 {
685 	struct filedesc *fdp = &p->p_fd;
686 	thread_t self = current_thread();
687 	struct uthread *ut = get_bsdthread_info(self);
688 	struct kqworkq *dealloc_kqwq = NULL;
689 
690 	/*
691 	 * If the current thread is bound as a workq/workloop
692 	 * servicing thread, we need to unbind it first.
693 	 */
694 	if (ut->uu_kqr_bound && get_bsdthreadtask_info(self) == p) {
695 		kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
696 	}
697 
698 	/*
699 	 * Deallocate the knotes for this process
700 	 * and mark the tables non-existent so
701 	 * subsequent kqueue closes go faster.
702 	 */
703 	knotes_dealloc(p);
704 	assert(fdp->fd_knlistsize == 0);
705 	assert(fdp->fd_knhashmask == 0);
706 
707 	proc_fdlock(p);
708 
709 	for (int i = fdp->fd_afterlast; i-- > 0;) {
710 		struct fileproc *fp = fdp->fd_ofiles[i];
711 		char *flagp = &fdp->fd_ofileflags[i];
712 		bool inherit_file = true;
713 
714 		if (fp == FILEPROC_NULL) {
715 			continue;
716 		}
717 
718 		/*
719 		 * no file descriptor should be in flux when in exec,
720 		 * because we stopped all other threads
721 		 */
722 		if (*flagp & ~UF_INHERIT) {
723 			panic("file %d/%p in flux during exec of %p", i, fp, p);
724 		}
725 
726 		if (fp->fp_flags & FP_CLOEXEC) {
727 			inherit_file = false;
728 		} else if ((posix_spawn_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) &&
729 		    !(*flagp & UF_INHERIT)) {
730 			/*
731 			 * Reverse the usual semantics of file descriptor
732 			 * inheritance - all of them should be closed
733 			 * except files marked explicitly as "inherit" and
734 			 * not marked close-on-exec.
735 			 */
736 			inherit_file = false;
737 #if CONFIG_MACF
738 		} else if (mac_file_check_inherit(proc_ucred(p), fp->fp_glob)) {
739 			inherit_file = false;
740 #endif
741 		}
742 
743 		*flagp = 0; /* clear UF_INHERIT */
744 
745 		if (!inherit_file) {
746 			fp_close_and_unlock(p, i, fp, 0);
747 			proc_fdlock(p);
748 		}
749 	}
750 
751 	/* release the per-process workq kq */
752 	if (fdp->fd_wqkqueue) {
753 		dealloc_kqwq = fdp->fd_wqkqueue;
754 		fdp->fd_wqkqueue = NULL;
755 	}
756 
757 	proc_fdunlock(p);
758 
759 	/* Anything to free? */
760 	if (dealloc_kqwq) {
761 		kqworkq_dealloc(dealloc_kqwq);
762 	}
763 }
764 
765 
766 int
fdt_fork(struct filedesc * newfdp,proc_t p,vnode_t uth_cdir)767 fdt_fork(struct filedesc *newfdp, proc_t p, vnode_t uth_cdir)
768 {
769 	struct filedesc *fdp = &p->p_fd;
770 	struct fileproc **ofiles;
771 	char *ofileflags;
772 	int n_files, afterlast, freefile;
773 	vnode_t v_dir;
774 #if CONFIG_PROC_RESOURCE_LIMITS
775 	int fd_nfiles_open = 0;
776 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
777 	proc_fdlock(p);
778 
779 	newfdp->fd_flags = (fdp->fd_flags & FILEDESC_FORK_INHERITED_MASK);
780 	newfdp->fd_cmask = fdp->fd_cmask;
781 #if CONFIG_PROC_RESOURCE_LIMITS
782 	newfdp->fd_nfiles_soft_limit = fdp->fd_nfiles_soft_limit;
783 	newfdp->fd_nfiles_hard_limit = fdp->fd_nfiles_hard_limit;
784 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
785 
786 	/*
787 	 * For both fd_cdir and fd_rdir make sure we get
788 	 * a valid reference... if we can't, than set
789 	 * set the pointer(s) to NULL in the child... this
790 	 * will keep us from using a non-referenced vp
791 	 * and allows us to do the vnode_rele only on
792 	 * a properly referenced vp
793 	 */
794 	if ((v_dir = fdp->fd_rdir)) {
795 		if (vnode_getwithref(v_dir) == 0) {
796 			if (vnode_ref(v_dir) == 0) {
797 				newfdp->fd_rdir = v_dir;
798 			}
799 			vnode_put(v_dir);
800 		}
801 		if (newfdp->fd_rdir == NULL) {
802 			/*
803 			 * We couldn't get a new reference on
804 			 * the chroot directory being
805 			 * inherited... this is fatal, since
806 			 * otherwise it would constitute an
807 			 * escape from a chroot environment by
808 			 * the new process.
809 			 */
810 			proc_fdunlock(p);
811 			return EPERM;
812 		}
813 	}
814 
815 	/*
816 	 * If we are running with per-thread current working directories,
817 	 * inherit the new current working directory from the current thread.
818 	 */
819 	if ((v_dir = uth_cdir ? uth_cdir : fdp->fd_cdir)) {
820 		if (vnode_getwithref(v_dir) == 0) {
821 			if (vnode_ref(v_dir) == 0) {
822 				newfdp->fd_cdir = v_dir;
823 			}
824 			vnode_put(v_dir);
825 		}
826 		if (newfdp->fd_cdir == NULL && v_dir == fdp->fd_cdir) {
827 			/*
828 			 * we couldn't get a new reference on
829 			 * the current working directory being
830 			 * inherited... we might as well drop
831 			 * our reference from the parent also
832 			 * since the vnode has gone DEAD making
833 			 * it useless... by dropping it we'll
834 			 * be that much closer to recycling it
835 			 */
836 			vnode_rele(fdp->fd_cdir);
837 			fdp->fd_cdir = NULL;
838 		}
839 	}
840 
841 	/*
842 	 * If the number of open files fits in the internal arrays
843 	 * of the open file structure, use them, otherwise allocate
844 	 * additional memory for the number of descriptors currently
845 	 * in use.
846 	 */
847 	afterlast = fdp->fd_afterlast;
848 	freefile = fdp->fd_freefile;
849 	if (afterlast <= NDFILE) {
850 		n_files = NDFILE;
851 	} else {
852 		n_files = roundup(afterlast, NDEXTENT);
853 	}
854 
855 	proc_fdunlock(p);
856 
857 	ofiles = kheap_alloc(KM_OFILETABL, n_files * OFILESIZE,
858 	    Z_WAITOK | Z_ZERO);
859 	if (ofiles == NULL) {
860 		if (newfdp->fd_cdir) {
861 			vnode_rele(newfdp->fd_cdir);
862 			newfdp->fd_cdir = NULL;
863 		}
864 		if (newfdp->fd_rdir) {
865 			vnode_rele(newfdp->fd_rdir);
866 			newfdp->fd_rdir = NULL;
867 		}
868 		return ENOMEM;
869 	}
870 	ofileflags = (char *)&ofiles[n_files];
871 
872 	proc_fdlock(p);
873 
874 	for (int i = afterlast; i-- > 0;) {
875 		struct fileproc *ofp, *nfp;
876 		char flags;
877 
878 		ofp = fdp->fd_ofiles[i];
879 		flags = fdp->fd_ofileflags[i];
880 
881 		if (ofp == NULL ||
882 		    (ofp->fp_glob->fg_lflags & FG_CONFINED) ||
883 		    (ofp->fp_flags & FP_CLOFORK) ||
884 		    (flags & UF_RESERVED)) {
885 			if (i + 1 == afterlast) {
886 				afterlast = i;
887 			}
888 			if (i < freefile) {
889 				freefile = i;
890 			}
891 
892 			continue;
893 		}
894 
895 		assert(ofp->fp_guard_attrs == 0);
896 		nfp = fileproc_alloc_init();
897 		nfp->fp_glob = ofp->fp_glob;
898 		nfp->fp_flags = (ofp->fp_flags & FP_CLOEXEC);
899 		fg_ref(p, nfp->fp_glob);
900 
901 		ofiles[i] = nfp;
902 #if CONFIG_PROC_RESOURCE_LIMITS
903 		fd_nfiles_open++;
904 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
905 	}
906 
907 	proc_fdunlock(p);
908 
909 	newfdp->fd_ofiles = ofiles;
910 	newfdp->fd_ofileflags = ofileflags;
911 	newfdp->fd_nfiles = n_files;
912 	newfdp->fd_afterlast = afterlast;
913 	newfdp->fd_freefile = freefile;
914 
915 #if CONFIG_PROC_RESOURCE_LIMITS
916 	newfdp->fd_nfiles_open = fd_nfiles_open;
917 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
918 
919 	return 0;
920 }
921 
922 void
fdt_invalidate(proc_t p)923 fdt_invalidate(proc_t p)
924 {
925 	struct filedesc *fdp = &p->p_fd;
926 	struct fileproc *fp, **ofiles;
927 	struct kqworkq *kqwq = NULL;
928 	vnode_t vn1 = NULL, vn2 = NULL;
929 	struct kqwllist *kqhash = NULL;
930 	u_long kqhashmask = 0;
931 	int n_files = 0;
932 
933 	/*
934 	 * deallocate all the knotes up front and claim empty
935 	 * tables to make any subsequent kqueue closes faster.
936 	 */
937 	knotes_dealloc(p);
938 	assert(fdp->fd_knlistsize == 0);
939 	assert(fdp->fd_knhashmask == 0);
940 
941 	/*
942 	 * dealloc all workloops that have outstanding retains
943 	 * when created with scheduling parameters.
944 	 */
945 	kqworkloops_dealloc(p);
946 
947 	proc_fdlock(p);
948 
949 	/* close file descriptors */
950 	if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
951 		for (int i = fdp->fd_afterlast; i-- > 0;) {
952 			if ((fp = fdp->fd_ofiles[i]) != NULL) {
953 				if (fdp->fd_ofileflags[i] & UF_RESERVED) {
954 					panic("fdfree: found fp with UF_RESERVED");
955 				}
956 				fp_close_and_unlock(p, i, fp, 0);
957 				proc_fdlock(p);
958 			}
959 		}
960 	}
961 
962 	n_files = fdp->fd_nfiles;
963 	ofiles = fdp->fd_ofiles;
964 	kqwq = fdp->fd_wqkqueue;
965 	vn1 = fdp->fd_cdir;
966 	vn2 = fdp->fd_rdir;
967 
968 	fdp->fd_ofileflags = NULL;
969 	fdp->fd_ofiles = NULL;
970 	fdp->fd_nfiles = 0;
971 	fdp->fd_wqkqueue = NULL;
972 	fdp->fd_cdir = NULL;
973 	fdp->fd_rdir = NULL;
974 
975 	proc_fdunlock(p);
976 
977 	lck_mtx_lock(&fdp->fd_knhashlock);
978 
979 	kqhash = fdp->fd_kqhash;
980 	kqhashmask = fdp->fd_kqhashmask;
981 
982 	fdp->fd_kqhash = 0;
983 	fdp->fd_kqhashmask = 0;
984 
985 	lck_mtx_unlock(&fdp->fd_knhashlock);
986 
987 	kheap_free(KM_OFILETABL, ofiles, n_files * OFILESIZE);
988 
989 	if (kqwq) {
990 		kqworkq_dealloc(kqwq);
991 	}
992 	if (vn1) {
993 		vnode_rele(vn1);
994 	}
995 	if (vn2) {
996 		vnode_rele(vn2);
997 	}
998 	if (kqhash) {
999 		for (uint32_t i = 0; i <= kqhashmask; i++) {
1000 			assert(LIST_EMPTY(&kqhash[i]));
1001 		}
1002 		hashdestroy(kqhash, M_KQUEUE, kqhashmask);
1003 	}
1004 }
1005 
1006 
1007 struct fileproc *
fileproc_alloc_init(void)1008 fileproc_alloc_init(void)
1009 {
1010 	struct fileproc *fp;
1011 
1012 	fp = zalloc_flags(fp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1013 	os_ref_init(&fp->fp_iocount, &f_refgrp);
1014 	return fp;
1015 }
1016 
1017 
1018 void
fileproc_free(struct fileproc * fp)1019 fileproc_free(struct fileproc *fp)
1020 {
1021 	os_ref_count_t __unused refc = os_ref_release(&fp->fp_iocount);
1022 #if DEVELOPMENT || DEBUG
1023 	if (0 != refc) {
1024 		panic("%s: pid %d refc: %u != 0",
1025 		    __func__, proc_pid(current_proc()), refc);
1026 	}
1027 #endif
1028 	if (fp->fp_guard_attrs) {
1029 		guarded_fileproc_unguard(fp);
1030 	}
1031 	assert(fp->fp_wset == NULL);
1032 	zfree(fp_zone, fp);
1033 }
1034 
1035 
1036 /*
1037  * Statistics counter for the number of times a process calling fdalloc()
1038  * has resulted in an expansion of the per process open file table.
1039  *
1040  * XXX This would likely be of more use if it were per process
1041  */
1042 int fdexpand;
1043 
1044 #if CONFIG_PROC_RESOURCE_LIMITS
1045 /*
1046  * Should be called only with the proc_fdlock held.
1047  */
1048 void
fd_check_limit_exceeded(struct filedesc * fdp)1049 fd_check_limit_exceeded(struct filedesc *fdp)
1050 {
1051 #if DIAGNOSTIC
1052 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1053 #endif
1054 	if (!fd_above_soft_limit_notify(fdp) && fdp->fd_nfiles_soft_limit &&
1055 	    (fdp->fd_nfiles_open > fdp->fd_nfiles_soft_limit)) {
1056 		fd_above_soft_limit_send_notification(fdp);
1057 		act_set_astproc_resource(current_thread());
1058 	} else if (!fd_above_hard_limit_notify(fdp) && fdp->fd_nfiles_hard_limit &&
1059 	    (fdp->fd_nfiles_open > fdp->fd_nfiles_hard_limit)) {
1060 		fd_above_hard_limit_send_notification(fdp);
1061 		act_set_astproc_resource(current_thread());
1062 	}
1063 }
1064 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1065 
1066 /*
1067  * fdalloc
1068  *
1069  * Description:	Allocate a file descriptor for the process.
1070  *
1071  * Parameters:	p				Process to allocate the fd in
1072  *		want				The fd we would prefer to get
1073  *		result				Pointer to fd we got
1074  *
1075  * Returns:	0				Success
1076  *		EMFILE
1077  *		ENOMEM
1078  *
1079  * Implicit returns:
1080  *		*result (modified)		The fd which was allocated
1081  */
1082 int
fdalloc(proc_t p,int want,int * result)1083 fdalloc(proc_t p, int want, int *result)
1084 {
1085 	struct filedesc *fdp = &p->p_fd;
1086 	int i;
1087 	int last, numfiles, oldnfiles;
1088 	struct fileproc **newofiles, **ofiles;
1089 	char *newofileflags;
1090 	int lim = proc_limitgetcur_nofile(p);
1091 
1092 	/*
1093 	 * Search for a free descriptor starting at the higher
1094 	 * of want or fd_freefile.  If that fails, consider
1095 	 * expanding the ofile array.
1096 	 */
1097 #if DIAGNOSTIC
1098 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1099 #endif
1100 
1101 	for (;;) {
1102 		last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
1103 		if ((i = want) < fdp->fd_freefile) {
1104 			i = fdp->fd_freefile;
1105 		}
1106 		for (; i < last; i++) {
1107 			if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
1108 				procfdtbl_reservefd(p, i);
1109 				if (i >= fdp->fd_afterlast) {
1110 					fdp->fd_afterlast = i + 1;
1111 				}
1112 				if (want <= fdp->fd_freefile) {
1113 					fdp->fd_freefile = i;
1114 				}
1115 				*result = i;
1116 #if CONFIG_PROC_RESOURCE_LIMITS
1117 				fdp->fd_nfiles_open++;
1118 				fd_check_limit_exceeded(fdp);
1119 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1120 				return 0;
1121 			}
1122 		}
1123 
1124 		/*
1125 		 * No space in current array.  Expand?
1126 		 */
1127 		if ((rlim_t)fdp->fd_nfiles >= lim) {
1128 			return EMFILE;
1129 		}
1130 		if (fdp->fd_nfiles < NDEXTENT) {
1131 			numfiles = NDEXTENT;
1132 		} else {
1133 			numfiles = 2 * fdp->fd_nfiles;
1134 		}
1135 		/* Enforce lim */
1136 		if ((rlim_t)numfiles > lim) {
1137 			numfiles = (int)lim;
1138 		}
1139 		proc_fdunlock(p);
1140 		newofiles = kheap_alloc(KM_OFILETABL, numfiles * OFILESIZE,
1141 		    Z_WAITOK);
1142 		proc_fdlock(p);
1143 		if (newofiles == NULL) {
1144 			return ENOMEM;
1145 		}
1146 		if (fdp->fd_nfiles >= numfiles) {
1147 			kheap_free(KM_OFILETABL, newofiles, numfiles * OFILESIZE);
1148 			continue;
1149 		}
1150 		newofileflags = (char *) &newofiles[numfiles];
1151 		/*
1152 		 * Copy the existing ofile and ofileflags arrays
1153 		 * and zero the new portion of each array.
1154 		 */
1155 		oldnfiles = fdp->fd_nfiles;
1156 		(void) memcpy(newofiles, fdp->fd_ofiles,
1157 		    oldnfiles * sizeof(*fdp->fd_ofiles));
1158 		(void) memset(&newofiles[oldnfiles], 0,
1159 		    (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
1160 
1161 		(void) memcpy(newofileflags, fdp->fd_ofileflags,
1162 		    oldnfiles * sizeof(*fdp->fd_ofileflags));
1163 		(void) memset(&newofileflags[oldnfiles], 0,
1164 		    (numfiles - oldnfiles) *
1165 		    sizeof(*fdp->fd_ofileflags));
1166 		ofiles = fdp->fd_ofiles;
1167 		fdp->fd_ofiles = newofiles;
1168 		fdp->fd_ofileflags = newofileflags;
1169 		fdp->fd_nfiles = numfiles;
1170 		kheap_free(KM_OFILETABL, ofiles, oldnfiles * OFILESIZE);
1171 		fdexpand++;
1172 	}
1173 }
1174 
1175 
1176 #pragma mark fileprocs
1177 
1178 void
fileproc_modify_vflags(struct fileproc * fp,fileproc_vflags_t vflags,boolean_t clearflags)1179 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
1180 {
1181 	if (clearflags) {
1182 		os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
1183 	} else {
1184 		os_atomic_or(&fp->fp_vflags, vflags, relaxed);
1185 	}
1186 }
1187 
1188 fileproc_vflags_t
fileproc_get_vflags(struct fileproc * fp)1189 fileproc_get_vflags(struct fileproc *fp)
1190 {
1191 	return os_atomic_load(&fp->fp_vflags, relaxed);
1192 }
1193 
1194 /*
1195  * falloc_withinit
1196  *
1197  * Create a new open file structure and allocate
1198  * a file descriptor for the process that refers to it.
1199  *
1200  * Returns:	0			Success
1201  *
1202  * Description:	Allocate an entry in the per process open file table and
1203  *		return the corresponding fileproc and fd.
1204  *
1205  * Parameters:	p				The process in whose open file
1206  *						table the fd is to be allocated
1207  *		resultfp			Pointer to fileproc pointer
1208  *						return area
1209  *		resultfd			Pointer to fd return area
1210  *		ctx				VFS context
1211  *		fp_zalloc			fileproc allocator to use
1212  *		crarg				allocator args
1213  *
1214  * Returns:	0				Success
1215  *		ENFILE				Too many open files in system
1216  *		fdalloc:EMFILE			Too many open files in process
1217  *		fdalloc:ENOMEM			M_OFILETABL zone exhausted
1218  *		ENOMEM				fp_zone or fg_zone zone
1219  *						exhausted
1220  *
1221  * Implicit returns:
1222  *		*resultfd (modified)		Returned fileproc pointer
1223  *		*resultfd (modified)		Returned fd
1224  *
1225  * Notes:	This function takes separate process and context arguments
1226  *		solely to support kern_exec.c; otherwise, it would take
1227  *		neither, and use the vfs_context_current() routine internally.
1228  */
1229 int
falloc_withinit(proc_t p,struct fileproc ** resultfp,int * resultfd,vfs_context_t ctx,fp_initfn_t fp_init,void * initarg)1230 falloc_withinit(proc_t p, struct fileproc **resultfp, int *resultfd,
1231     vfs_context_t ctx, fp_initfn_t fp_init, void *initarg)
1232 {
1233 	struct fileproc *fp;
1234 	struct fileglob *fg;
1235 	int error, nfd;
1236 #if CONFIG_MACF
1237 	kauth_cred_t cred;
1238 #endif
1239 
1240 	/* Make sure we don't go beyond the system-wide limit */
1241 	if (nfiles >= maxfiles) {
1242 		tablefull("file");
1243 		return ENFILE;
1244 	}
1245 
1246 	proc_fdlock(p);
1247 
1248 	/* fdalloc will make sure the process stays below per-process limit */
1249 	if ((error = fdalloc(p, 0, &nfd))) {
1250 		proc_fdunlock(p);
1251 		return error;
1252 	}
1253 
1254 #if CONFIG_MACF
1255 	cred = kauth_cred_proc_ref(p);
1256 	error = mac_file_check_create(cred);
1257 	kauth_cred_unref(&cred);
1258 	if (error) {
1259 		proc_fdunlock(p);
1260 		return error;
1261 	}
1262 #endif
1263 
1264 	/*
1265 	 * Allocate a new file descriptor.
1266 	 * If the process has file descriptor zero open, add to the list
1267 	 * of open files at that point, otherwise put it at the front of
1268 	 * the list of open files.
1269 	 */
1270 	proc_fdunlock(p);
1271 
1272 	fp = fileproc_alloc_init();
1273 	if (fp_init) {
1274 		fp_init(fp, initarg);
1275 	}
1276 
1277 	fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
1278 	lck_mtx_init(&fg->fg_lock, &file_lck_grp, LCK_ATTR_NULL);
1279 
1280 	os_ref_retain_locked(&fp->fp_iocount);
1281 	os_ref_init_raw(&fg->fg_count, &f_refgrp);
1282 	fg->fg_ops = &uninitops;
1283 	fp->fp_glob = fg;
1284 
1285 	kauth_cred_ref(ctx->vc_ucred);
1286 
1287 	fp->f_cred = ctx->vc_ucred;
1288 
1289 	os_atomic_inc(&nfiles, relaxed);
1290 
1291 	proc_fdlock(p);
1292 
1293 	p->p_fd.fd_ofiles[nfd] = fp;
1294 
1295 	proc_fdunlock(p);
1296 
1297 	if (resultfp) {
1298 		*resultfp = fp;
1299 	}
1300 	if (resultfd) {
1301 		*resultfd = nfd;
1302 	}
1303 
1304 	return 0;
1305 }
1306 
1307 int
falloc(proc_t p,struct fileproc ** resultfp,int * resultfd,vfs_context_t ctx)1308 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
1309 {
1310 	return falloc_withinit(p, resultfp, resultfd, ctx, NULL, NULL);
1311 }
1312 
1313 
1314 /*
1315  * fp_free
1316  *
1317  * Description:	Release the fd and free the fileproc associated with the fd
1318  *		in the per process open file table of the specified process;
1319  *		these values must correspond.
1320  *
1321  * Parameters:	p				Process containing fd
1322  *		fd				fd to be released
1323  *		fp				fileproc to be freed
1324  */
1325 void
fp_free(proc_t p,int fd,struct fileproc * fp)1326 fp_free(proc_t p, int fd, struct fileproc * fp)
1327 {
1328 	proc_fdlock_spin(p);
1329 	fdrelse(p, fd);
1330 	proc_fdunlock(p);
1331 
1332 	fg_free(fp->fp_glob);
1333 	os_ref_release_live(&fp->fp_iocount);
1334 	fileproc_free(fp);
1335 }
1336 
1337 
1338 struct fileproc *
fp_get_noref_locked(proc_t p,int fd)1339 fp_get_noref_locked(proc_t p, int fd)
1340 {
1341 	struct filedesc *fdp = &p->p_fd;
1342 	struct fileproc *fp;
1343 
1344 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1345 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1346 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1347 		return NULL;
1348 	}
1349 
1350 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1351 	return fp;
1352 }
1353 
1354 struct fileproc *
fp_get_noref_locked_with_iocount(proc_t p,int fd)1355 fp_get_noref_locked_with_iocount(proc_t p, int fd)
1356 {
1357 	struct filedesc *fdp = &p->p_fd;
1358 	struct fileproc *fp = NULL;
1359 
1360 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1361 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1362 	    os_ref_get_count(&fp->fp_iocount) <= 1 ||
1363 	    ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1364 	    !(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
1365 		panic("%s: caller without an ioccount on fileproc (%d/:%p)",
1366 		    __func__, fd, fp);
1367 	}
1368 
1369 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1370 	return fp;
1371 }
1372 
1373 
1374 /*
1375  * fp_lookup
1376  *
1377  * Description:	Get fileproc pointer for a given fd from the per process
1378  *		open file table of the specified process and if successful,
1379  *		increment the fp_iocount
1380  *
1381  * Parameters:	p				Process in which fd lives
1382  *		fd				fd to get information for
1383  *		resultfp			Pointer to result fileproc
1384  *						pointer area, or 0 if none
1385  *		locked				!0 if the caller holds the
1386  *						proc_fdlock, 0 otherwise
1387  *
1388  * Returns:	0			Success
1389  *		EBADF			Bad file descriptor
1390  *
1391  * Implicit returns:
1392  *		*resultfp (modified)		Fileproc pointer
1393  *
1394  * Locks:	If the argument 'locked' is non-zero, then the caller is
1395  *		expected to have taken and held the proc_fdlock; if it is
1396  *		zero, than this routine internally takes and drops this lock.
1397  */
1398 int
fp_lookup(proc_t p,int fd,struct fileproc ** resultfp,int locked)1399 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
1400 {
1401 	struct filedesc *fdp = &p->p_fd;
1402 	struct fileproc *fp;
1403 
1404 	if (!locked) {
1405 		proc_fdlock_spin(p);
1406 	}
1407 	if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
1408 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1409 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1410 		if (!locked) {
1411 			proc_fdunlock(p);
1412 		}
1413 		return EBADF;
1414 	}
1415 
1416 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1417 	os_ref_retain_locked(&fp->fp_iocount);
1418 
1419 	if (resultfp) {
1420 		*resultfp = fp;
1421 	}
1422 	if (!locked) {
1423 		proc_fdunlock(p);
1424 	}
1425 
1426 	return 0;
1427 }
1428 
1429 
1430 int
fp_get_ftype(proc_t p,int fd,file_type_t ftype,int err,struct fileproc ** fpp)1431 fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
1432 {
1433 	struct filedesc *fdp = &p->p_fd;
1434 	struct fileproc *fp;
1435 
1436 	proc_fdlock_spin(p);
1437 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1438 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1439 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1440 		proc_fdunlock(p);
1441 		return EBADF;
1442 	}
1443 
1444 	if (fp->f_type != ftype) {
1445 		proc_fdunlock(p);
1446 		return err;
1447 	}
1448 
1449 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1450 	os_ref_retain_locked(&fp->fp_iocount);
1451 	proc_fdunlock(p);
1452 
1453 	*fpp = fp;
1454 	return 0;
1455 }
1456 
1457 
1458 /*
1459  * fp_drop
1460  *
1461  * Description:	Drop the I/O reference previously taken by calling fp_lookup
1462  *		et. al.
1463  *
1464  * Parameters:	p				Process in which the fd lives
1465  *		fd				fd associated with the fileproc
1466  *		fp				fileproc on which to set the
1467  *						flag and drop the reference
1468  *		locked				flag to internally take and
1469  *						drop proc_fdlock if it is not
1470  *						already held by the caller
1471  *
1472  * Returns:	0				Success
1473  *		EBADF				Bad file descriptor
1474  *
1475  * Locks:	This function internally takes and drops the proc_fdlock for
1476  *		the supplied process if 'locked' is non-zero, and assumes that
1477  *		the caller already holds this lock if 'locked' is non-zero.
1478  *
1479  * Notes:	The fileproc must correspond to the fd in the supplied proc
1480  */
1481 int
fp_drop(proc_t p,int fd,struct fileproc * fp,int locked)1482 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
1483 {
1484 	struct filedesc *fdp = &p->p_fd;
1485 	int     needwakeup = 0;
1486 
1487 	if (!locked) {
1488 		proc_fdlock_spin(p);
1489 	}
1490 	if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
1491 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1492 	    ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1493 	    !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
1494 		if (!locked) {
1495 			proc_fdunlock(p);
1496 		}
1497 		return EBADF;
1498 	}
1499 
1500 	if (1 == os_ref_release_locked(&fp->fp_iocount)) {
1501 		if (fp->fp_flags & FP_SELCONFLICT) {
1502 			fp->fp_flags &= ~FP_SELCONFLICT;
1503 		}
1504 
1505 		if (fdp->fd_fpdrainwait) {
1506 			fdp->fd_fpdrainwait = 0;
1507 			needwakeup = 1;
1508 		}
1509 	}
1510 	if (!locked) {
1511 		proc_fdunlock(p);
1512 	}
1513 	if (needwakeup) {
1514 		wakeup(&fdp->fd_fpdrainwait);
1515 	}
1516 
1517 	return 0;
1518 }
1519 
1520 
1521 /*
1522  * fileproc_drain
1523  *
1524  * Description:	Drain out pending I/O operations
1525  *
1526  * Parameters:	p				Process closing this file
1527  *		fp				fileproc struct for the open
1528  *						instance on the file
1529  *
1530  * Returns:	void
1531  *
1532  * Locks:	Assumes the caller holds the proc_fdlock
1533  *
1534  * Notes:	For character devices, this occurs on the last close of the
1535  *		device; for all other file descriptors, this occurs on each
1536  *		close to prevent fd's from being closed out from under
1537  *		operations currently in progress and blocked
1538  *
1539  * See Also:    file_vnode(), file_socket(), file_drop(), and the cautions
1540  *		regarding their use and interaction with this function.
1541  */
1542 static void
fileproc_drain(proc_t p,struct fileproc * fp)1543 fileproc_drain(proc_t p, struct fileproc * fp)
1544 {
1545 	struct filedesc *fdp = &p->p_fd;
1546 	struct vfs_context context;
1547 	thread_t thread;
1548 	bool is_current_proc;
1549 
1550 	is_current_proc = (p == current_proc());
1551 
1552 	if (!is_current_proc) {
1553 		proc_lock(p);
1554 		thread = proc_thread(p); /* XXX */
1555 		thread_reference(thread);
1556 		proc_unlock(p);
1557 	} else {
1558 		thread = current_thread();
1559 	}
1560 
1561 	context.vc_thread = thread;
1562 	context.vc_ucred = fp->fp_glob->fg_cred;
1563 
1564 	/* Set the vflag for drain */
1565 	fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
1566 
1567 	while (os_ref_get_count(&fp->fp_iocount) > 1) {
1568 		lck_mtx_convert_spin(&fdp->fd_lock);
1569 
1570 		fo_drain(fp, &context);
1571 		if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
1572 			struct waitq_set *wqset;
1573 
1574 			if (fp->fp_guard_attrs) {
1575 				wqset = fp->fp_guard->fpg_wset;
1576 			} else {
1577 				wqset = fp->fp_wset;
1578 			}
1579 			if (waitq_wakeup64_all((struct waitq *)wqset, NO_EVENT64,
1580 			    THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
1581 				panic("bad wait queue for waitq_wakeup64_all %p (%sfp:%p)",
1582 				    wqset, fp->fp_guard_attrs ? "guarded " : "", fp);
1583 			}
1584 		}
1585 		if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1586 			if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
1587 			    THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
1588 				panic("bad select_conflict_queue");
1589 			}
1590 		}
1591 		fdp->fd_fpdrainwait = 1;
1592 		msleep(&fdp->fd_fpdrainwait, &fdp->fd_lock, PRIBIO, "fpdrain", NULL);
1593 	}
1594 #if DIAGNOSTIC
1595 	if ((fp->fp_flags & FP_INSELECT) != 0) {
1596 		panic("FP_INSELECT set on drained fp");
1597 	}
1598 #endif
1599 	if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1600 		fp->fp_flags &= ~FP_SELCONFLICT;
1601 	}
1602 
1603 	if (!is_current_proc) {
1604 		thread_deallocate(thread);
1605 	}
1606 }
1607 
1608 
1609 int
fp_close_and_unlock(proc_t p,int fd,struct fileproc * fp,int flags)1610 fp_close_and_unlock(proc_t p, int fd, struct fileproc *fp, int flags)
1611 {
1612 	struct filedesc *fdp = &p->p_fd;
1613 	struct fileglob *fg = fp->fp_glob;
1614 #if CONFIG_MACF
1615 	kauth_cred_t cred;
1616 #endif
1617 
1618 #if DIAGNOSTIC
1619 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1620 #endif
1621 
1622 	/*
1623 	 * Keep most people from finding the filedesc while we are closing it.
1624 	 *
1625 	 * Callers are:
1626 	 *
1627 	 * - dup2() which always waits for UF_RESERVED to clear
1628 	 *
1629 	 * - close/guarded_close/... who will fail the fileproc lookup if
1630 	 *   UF_RESERVED is set,
1631 	 *
1632 	 * - fdexec()/fdfree() who only run once all threads in the proc
1633 	 *   are properly canceled, hence no fileproc in this proc should
1634 	 *   be in flux.
1635 	 *
1636 	 * Which means that neither UF_RESERVED nor UF_CLOSING should be set.
1637 	 *
1638 	 * Callers of fp_get_noref_locked_with_iocount() can still find
1639 	 * this entry so that they can drop their I/O reference despite
1640 	 * not having remembered the fileproc pointer (namely select() and
1641 	 * file_drop()).
1642 	 */
1643 	if (p->p_fd.fd_ofileflags[fd] & (UF_RESERVED | UF_CLOSING)) {
1644 		panic("%s: called with fileproc in flux (%d/:%p)",
1645 		    __func__, fd, fp);
1646 	}
1647 	p->p_fd.fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
1648 
1649 	if ((fp->fp_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners()) {
1650 		proc_fdunlock(p);
1651 
1652 		if ((FILEGLOB_DTYPE(fg) == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners()) {
1653 			/*
1654 			 * call out to allow 3rd party notification of close.
1655 			 * Ignore result of kauth_authorize_fileop call.
1656 			 */
1657 			if (vnode_getwithref((vnode_t)fg_get_data(fg)) == 0) {
1658 				u_int   fileop_flags = 0;
1659 				if (fg->fg_flag & FWASWRITTEN) {
1660 					fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
1661 				}
1662 				kauth_authorize_fileop(fg->fg_cred, KAUTH_FILEOP_CLOSE,
1663 				    (uintptr_t)fg_get_data(fg), (uintptr_t)fileop_flags);
1664 #if CONFIG_MACF
1665 				cred = kauth_cred_proc_ref(p);
1666 				mac_file_notify_close(cred, fp->fp_glob);
1667 				kauth_cred_unref(&cred);
1668 #endif
1669 				vnode_put((vnode_t)fg_get_data(fg));
1670 			}
1671 		}
1672 		if (fp->fp_flags & FP_AIOISSUED) {
1673 			/*
1674 			 * cancel all async IO requests that can be cancelled.
1675 			 */
1676 			_aio_close( p, fd );
1677 		}
1678 
1679 		proc_fdlock(p);
1680 	}
1681 
1682 	if (fd < fdp->fd_knlistsize) {
1683 		knote_fdclose(p, fd);
1684 	}
1685 
1686 	fileproc_drain(p, fp);
1687 
1688 	if (flags & FD_DUP2RESV) {
1689 		fdp->fd_ofiles[fd] = NULL;
1690 		fdp->fd_ofileflags[fd] &= ~UF_CLOSING;
1691 	} else {
1692 		fdrelse(p, fd);
1693 	}
1694 
1695 	proc_fdunlock(p);
1696 
1697 	if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
1698 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
1699 		    fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fg_get_data(fg)));
1700 	}
1701 
1702 	fileproc_free(fp);
1703 
1704 	return fg_drop(p, fg);
1705 }
1706 
1707 
1708 /*
1709  * dupfdopen
1710  *
1711  * Description:	Duplicate the specified descriptor to a free descriptor;
1712  *		this is the second half of fdopen(), above.
1713  *
1714  * Parameters:	p				current process pointer
1715  *		indx				fd to dup to
1716  *		dfd				fd to dup from
1717  *		mode				mode to set on new fd
1718  *		error				command code
1719  *
1720  * Returns:	0				Success
1721  *		EBADF				Source fd is bad
1722  *		EACCES				Requested mode not allowed
1723  *		!0				'error', if not ENODEV or
1724  *						ENXIO
1725  *
1726  * Notes:	XXX This is not thread safe; see fdopen() above
1727  */
1728 int
dupfdopen(proc_t p,int indx,int dfd,int flags,int error)1729 dupfdopen(proc_t p, int indx, int dfd, int flags, int error)
1730 {
1731 	struct filedesc *fdp = &p->p_fd;
1732 	struct fileproc *wfp;
1733 	struct fileproc *fp;
1734 #if CONFIG_MACF
1735 	int myerror;
1736 #endif
1737 
1738 	/*
1739 	 * If the to-be-dup'd fd number is greater than the allowed number
1740 	 * of file descriptors, or the fd to be dup'd has already been
1741 	 * closed, reject.  Note, check for new == old is necessary as
1742 	 * falloc could allocate an already closed to-be-dup'd descriptor
1743 	 * as the new descriptor.
1744 	 */
1745 	proc_fdlock(p);
1746 
1747 	fp = fdp->fd_ofiles[indx];
1748 	if (dfd < 0 || dfd >= fdp->fd_nfiles ||
1749 	    (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
1750 	    (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
1751 		proc_fdunlock(p);
1752 		return EBADF;
1753 	}
1754 #if CONFIG_MACF
1755 	myerror = mac_file_check_dup(kauth_cred_get(), wfp->fp_glob, dfd);
1756 	if (myerror) {
1757 		proc_fdunlock(p);
1758 		return myerror;
1759 	}
1760 #endif
1761 	/*
1762 	 * There are two cases of interest here.
1763 	 *
1764 	 * For ENODEV simply dup (dfd) to file descriptor
1765 	 * (indx) and return.
1766 	 *
1767 	 * For ENXIO steal away the file structure from (dfd) and
1768 	 * store it in (indx).  (dfd) is effectively closed by
1769 	 * this operation.
1770 	 *
1771 	 * Any other error code is just returned.
1772 	 */
1773 	switch (error) {
1774 	case ENODEV:
1775 		if (fp_isguarded(wfp, GUARD_DUP)) {
1776 			proc_fdunlock(p);
1777 			return EPERM;
1778 		}
1779 
1780 		/*
1781 		 * Check that the mode the file is being opened for is a
1782 		 * subset of the mode of the existing descriptor.
1783 		 */
1784 		if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1785 			proc_fdunlock(p);
1786 			return EACCES;
1787 		}
1788 		if (indx >= fdp->fd_afterlast) {
1789 			fdp->fd_afterlast = indx + 1;
1790 		}
1791 
1792 		if (fp->fp_glob) {
1793 			fg_free(fp->fp_glob);
1794 		}
1795 		fg_ref(p, wfp->fp_glob);
1796 		fp->fp_glob = wfp->fp_glob;
1797 		/*
1798 		 * Historically, open(/dev/fd/<n>) preserves close on fork/exec,
1799 		 * unlike dup(), dup2() or fcntl(F_DUPFD).
1800 		 *
1801 		 * open1() already handled O_CLO{EXEC,FORK}
1802 		 */
1803 		fp->fp_flags |= (wfp->fp_flags & (FP_CLOFORK | FP_CLOEXEC));
1804 
1805 		procfdtbl_releasefd(p, indx, NULL);
1806 		fp_drop(p, indx, fp, 1);
1807 		proc_fdunlock(p);
1808 		return 0;
1809 
1810 	default:
1811 		proc_fdunlock(p);
1812 		return error;
1813 	}
1814 	/* NOTREACHED */
1815 }
1816 
1817 
1818 #pragma mark KPIS (sys/file.h)
1819 
1820 /*
1821  * fg_get_vnode
1822  *
1823  * Description:	Return vnode associated with the file structure, if
1824  *		any.  The lifetime of the returned vnode is bound to
1825  *		the lifetime of the file structure.
1826  *
1827  * Parameters:	fg				Pointer to fileglob to
1828  *						inspect
1829  *
1830  * Returns:	vnode_t
1831  */
1832 vnode_t
fg_get_vnode(struct fileglob * fg)1833 fg_get_vnode(struct fileglob *fg)
1834 {
1835 	if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1836 		return (vnode_t)fg_get_data(fg);
1837 	} else {
1838 		return NULL;
1839 	}
1840 }
1841 
1842 
1843 /*
1844  * fp_getfvp
1845  *
1846  * Description:	Get fileproc and vnode pointer for a given fd from the per
1847  *		process open file table of the specified process, and if
1848  *		successful, increment the fp_iocount
1849  *
1850  * Parameters:	p				Process in which fd lives
1851  *		fd				fd to get information for
1852  *		resultfp			Pointer to result fileproc
1853  *						pointer area, or 0 if none
1854  *		resultvp			Pointer to result vnode pointer
1855  *						area, or 0 if none
1856  *
1857  * Returns:	0				Success
1858  *		EBADF				Bad file descriptor
1859  *		ENOTSUP				fd does not refer to a vnode
1860  *
1861  * Implicit returns:
1862  *		*resultfp (modified)		Fileproc pointer
1863  *		*resultvp (modified)		vnode pointer
1864  *
1865  * Notes:	The resultfp and resultvp fields are optional, and may be
1866  *		independently specified as NULL to skip returning information
1867  *
1868  * Locks:	Internally takes and releases proc_fdlock
1869  */
1870 int
fp_getfvp(proc_t p,int fd,struct fileproc ** resultfp,struct vnode ** resultvp)1871 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
1872 {
1873 	struct fileproc *fp;
1874 	int error;
1875 
1876 	error = fp_get_ftype(p, fd, DTYPE_VNODE, ENOTSUP, &fp);
1877 	if (error == 0) {
1878 		if (resultfp) {
1879 			*resultfp = fp;
1880 		}
1881 		if (resultvp) {
1882 			*resultvp = (struct vnode *)fp_get_data(fp);
1883 		}
1884 	}
1885 
1886 	return error;
1887 }
1888 
1889 
1890 /*
1891  * fp_get_pipe_id
1892  *
1893  * Description:	Get pipe id for a given fd from the per process open file table
1894  *		of the specified process.
1895  *
1896  * Parameters:	p				Process in which fd lives
1897  *		fd				fd to get information for
1898  *		result_pipe_id			Pointer to result pipe id
1899  *
1900  * Returns:	0				Success
1901  *		EIVAL				NULL pointer arguments passed
1902  *		fp_lookup:EBADF			Bad file descriptor
1903  *		ENOTSUP				fd does not refer to a pipe
1904  *
1905  * Implicit returns:
1906  *		*result_pipe_id (modified)	pipe id
1907  *
1908  * Locks:	Internally takes and releases proc_fdlock
1909  */
1910 int
fp_get_pipe_id(proc_t p,int fd,uint64_t * result_pipe_id)1911 fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
1912 {
1913 	struct fileproc *fp = FILEPROC_NULL;
1914 	struct fileglob *fg = NULL;
1915 	int error = 0;
1916 
1917 	if (p == NULL || result_pipe_id == NULL) {
1918 		return EINVAL;
1919 	}
1920 
1921 	proc_fdlock(p);
1922 	if ((error = fp_lookup(p, fd, &fp, 1))) {
1923 		proc_fdunlock(p);
1924 		return error;
1925 	}
1926 	fg = fp->fp_glob;
1927 
1928 	if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
1929 		*result_pipe_id = pipe_id((struct pipe*)fg_get_data(fg));
1930 	} else {
1931 		error = ENOTSUP;
1932 	}
1933 
1934 	fp_drop(p, fd, fp, 1);
1935 	proc_fdunlock(p);
1936 	return error;
1937 }
1938 
1939 
1940 /*
1941  * file_vnode
1942  *
1943  * Description:	Given an fd, look it up in the current process's per process
1944  *		open file table, and return its internal vnode pointer.
1945  *
1946  * Parameters:	fd				fd to obtain vnode from
1947  *		vpp				pointer to vnode return area
1948  *
1949  * Returns:	0				Success
1950  *		EINVAL				The fd does not refer to a
1951  *						vnode fileproc entry
1952  *	fp_lookup:EBADF				Bad file descriptor
1953  *
1954  * Implicit returns:
1955  *		*vpp (modified)			Returned vnode pointer
1956  *
1957  * Locks:	This function internally takes and drops the proc_fdlock for
1958  *		the current process
1959  *
1960  * Notes:	If successful, this function increments the fp_iocount on the
1961  *		fd's corresponding fileproc.
1962  *
1963  *		The fileproc referenced is not returned; because of this, care
1964  *		must be taken to not drop the last reference (e.g. by closing
1965  *		the file).  This is inherently unsafe, since the reference may
1966  *		not be recoverable from the vnode, if there is a subsequent
1967  *		close that destroys the associate fileproc.  The caller should
1968  *		therefore retain their own reference on the fileproc so that
1969  *		the fp_iocount can be dropped subsequently.  Failure to do this
1970  *		can result in the returned pointer immediately becoming invalid
1971  *		following the call.
1972  *
1973  *		Use of this function is discouraged.
1974  */
1975 int
file_vnode(int fd,struct vnode ** vpp)1976 file_vnode(int fd, struct vnode **vpp)
1977 {
1978 	return file_vnode_withvid(fd, vpp, NULL);
1979 }
1980 
1981 
1982 /*
1983  * file_vnode_withvid
1984  *
1985  * Description:	Given an fd, look it up in the current process's per process
1986  *		open file table, and return its internal vnode pointer.
1987  *
1988  * Parameters:	fd				fd to obtain vnode from
1989  *		vpp				pointer to vnode return area
1990  *		vidp				pointer to vid of the returned vnode
1991  *
1992  * Returns:	0				Success
1993  *		EINVAL				The fd does not refer to a
1994  *						vnode fileproc entry
1995  *	fp_lookup:EBADF				Bad file descriptor
1996  *
1997  * Implicit returns:
1998  *		*vpp (modified)			Returned vnode pointer
1999  *
2000  * Locks:	This function internally takes and drops the proc_fdlock for
2001  *		the current process
2002  *
2003  * Notes:	If successful, this function increments the fp_iocount on the
2004  *		fd's corresponding fileproc.
2005  *
2006  *		The fileproc referenced is not returned; because of this, care
2007  *		must be taken to not drop the last reference (e.g. by closing
2008  *		the file).  This is inherently unsafe, since the reference may
2009  *		not be recoverable from the vnode, if there is a subsequent
2010  *		close that destroys the associate fileproc.  The caller should
2011  *		therefore retain their own reference on the fileproc so that
2012  *		the fp_iocount can be dropped subsequently.  Failure to do this
2013  *		can result in the returned pointer immediately becoming invalid
2014  *		following the call.
2015  *
2016  *		Use of this function is discouraged.
2017  */
2018 int
file_vnode_withvid(int fd,struct vnode ** vpp,uint32_t * vidp)2019 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t *vidp)
2020 {
2021 	struct fileproc *fp;
2022 	int error;
2023 
2024 	error = fp_get_ftype(current_proc(), fd, DTYPE_VNODE, EINVAL, &fp);
2025 	if (error == 0) {
2026 		if (vpp) {
2027 			*vpp = (struct vnode *)fp_get_data(fp);
2028 		}
2029 		if (vidp) {
2030 			*vidp = vnode_vid((struct vnode *)fp_get_data(fp));
2031 		}
2032 	}
2033 	return error;
2034 }
2035 
2036 /*
2037  * file_socket
2038  *
2039  * Description:	Given an fd, look it up in the current process's per process
2040  *		open file table, and return its internal socket pointer.
2041  *
2042  * Parameters:	fd				fd to obtain vnode from
2043  *		sp				pointer to socket return area
2044  *
2045  * Returns:	0				Success
2046  *		ENOTSOCK			Not a socket
2047  *		fp_lookup:EBADF			Bad file descriptor
2048  *
2049  * Implicit returns:
2050  *		*sp (modified)			Returned socket pointer
2051  *
2052  * Locks:	This function internally takes and drops the proc_fdlock for
2053  *		the current process
2054  *
2055  * Notes:	If successful, this function increments the fp_iocount on the
2056  *		fd's corresponding fileproc.
2057  *
2058  *		The fileproc referenced is not returned; because of this, care
2059  *		must be taken to not drop the last reference (e.g. by closing
2060  *		the file).  This is inherently unsafe, since the reference may
2061  *		not be recoverable from the socket, if there is a subsequent
2062  *		close that destroys the associate fileproc.  The caller should
2063  *		therefore retain their own reference on the fileproc so that
2064  *		the fp_iocount can be dropped subsequently.  Failure to do this
2065  *		can result in the returned pointer immediately becoming invalid
2066  *		following the call.
2067  *
2068  *		Use of this function is discouraged.
2069  */
2070 int
file_socket(int fd,struct socket ** sp)2071 file_socket(int fd, struct socket **sp)
2072 {
2073 	struct fileproc *fp;
2074 	int error;
2075 
2076 	error = fp_get_ftype(current_proc(), fd, DTYPE_SOCKET, ENOTSOCK, &fp);
2077 	if (error == 0) {
2078 		if (sp) {
2079 			*sp = (struct socket *)fp_get_data(fp);
2080 		}
2081 	}
2082 	return error;
2083 }
2084 
2085 
2086 /*
2087  * file_flags
2088  *
2089  * Description:	Given an fd, look it up in the current process's per process
2090  *		open file table, and return its fileproc's flags field.
2091  *
2092  * Parameters:	fd				fd whose flags are to be
2093  *						retrieved
2094  *		flags				pointer to flags data area
2095  *
2096  * Returns:	0				Success
2097  *		ENOTSOCK			Not a socket
2098  *		fp_lookup:EBADF			Bad file descriptor
2099  *
2100  * Implicit returns:
2101  *		*flags (modified)		Returned flags field
2102  *
2103  * Locks:	This function internally takes and drops the proc_fdlock for
2104  *		the current process
2105  */
2106 int
file_flags(int fd,int * flags)2107 file_flags(int fd, int *flags)
2108 {
2109 	proc_t p = current_proc();
2110 	struct fileproc *fp;
2111 	int error = EBADF;
2112 
2113 	proc_fdlock_spin(p);
2114 	fp = fp_get_noref_locked(p, fd);
2115 	if (fp) {
2116 		*flags = (int)fp->f_flag;
2117 		error = 0;
2118 	}
2119 	proc_fdunlock(p);
2120 
2121 	return error;
2122 }
2123 
2124 
2125 /*
2126  * file_drop
2127  *
2128  * Description:	Drop an iocount reference on an fd, and wake up any waiters
2129  *		for draining (i.e. blocked in fileproc_drain() called during
2130  *		the last attempt to close a file).
2131  *
2132  * Parameters:	fd				fd on which an ioreference is
2133  *						to be dropped
2134  *
2135  * Returns:	0				Success
2136  *
2137  * Description:	Given an fd, look it up in the current process's per process
2138  *		open file table, and drop it's fileproc's fp_iocount by one
2139  *
2140  * Notes:	This is intended as a corresponding operation to the functions
2141  *		file_vnode() and file_socket() operations.
2142  *
2143  *		If the caller can't possibly hold an I/O reference,
2144  *		this function will panic the kernel rather than allowing
2145  *		for memory corruption. Callers should always call this
2146  *		because they acquired an I/O reference on this file before.
2147  *
2148  *		Use of this function is discouraged.
2149  */
2150 int
file_drop(int fd)2151 file_drop(int fd)
2152 {
2153 	struct fileproc *fp;
2154 	proc_t p = current_proc();
2155 	struct filedesc *fdp = &p->p_fd;
2156 	int     needwakeup = 0;
2157 
2158 	proc_fdlock_spin(p);
2159 	fp = fp_get_noref_locked_with_iocount(p, fd);
2160 
2161 	if (1 == os_ref_release_locked(&fp->fp_iocount)) {
2162 		if (fp->fp_flags & FP_SELCONFLICT) {
2163 			fp->fp_flags &= ~FP_SELCONFLICT;
2164 		}
2165 
2166 		if (fdp->fd_fpdrainwait) {
2167 			fdp->fd_fpdrainwait = 0;
2168 			needwakeup = 1;
2169 		}
2170 	}
2171 	proc_fdunlock(p);
2172 
2173 	if (needwakeup) {
2174 		wakeup(&fdp->fd_fpdrainwait);
2175 	}
2176 	return 0;
2177 }
2178 
2179 
2180 int
fd_rdwr(int fd,enum uio_rw rw,uint64_t base,int64_t len,enum uio_seg segflg,off_t offset,int io_flg,int64_t * aresid)2181 fd_rdwr(
2182 	int fd,
2183 	enum uio_rw rw,
2184 	uint64_t base,
2185 	int64_t len,
2186 	enum uio_seg segflg,
2187 	off_t   offset,
2188 	int     io_flg,
2189 	int64_t *aresid)
2190 {
2191 	struct fileproc *fp;
2192 	proc_t  p;
2193 	int error = 0;
2194 	int flags = 0;
2195 	int spacetype;
2196 	uio_t auio = NULL;
2197 	uio_stackbuf_t uio_buf[UIO_SIZEOF(1)];
2198 	struct vfs_context context = *(vfs_context_current());
2199 
2200 	p = current_proc();
2201 
2202 	error = fp_lookup(p, fd, &fp, 0);
2203 	if (error) {
2204 		return error;
2205 	}
2206 
2207 	switch (FILEGLOB_DTYPE(fp->fp_glob)) {
2208 	case DTYPE_VNODE:
2209 	case DTYPE_PIPE:
2210 	case DTYPE_SOCKET:
2211 		break;
2212 	default:
2213 		error = EINVAL;
2214 		goto out;
2215 	}
2216 	if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
2217 		error = EBADF;
2218 		goto out;
2219 	}
2220 
2221 	if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
2222 		error = EBADF;
2223 		goto out;
2224 	}
2225 
2226 	context.vc_ucred = fp->fp_glob->fg_cred;
2227 
2228 	if (UIO_SEG_IS_USER_SPACE(segflg)) {
2229 		spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
2230 	} else {
2231 		spacetype = UIO_SYSSPACE;
2232 	}
2233 
2234 	auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
2235 
2236 	uio_addiov(auio, (user_addr_t)base, (user_size_t)len);
2237 
2238 	if (!(io_flg & IO_APPEND)) {
2239 		flags = FOF_OFFSET;
2240 	}
2241 
2242 	if (rw == UIO_WRITE) {
2243 		user_ssize_t orig_resid = uio_resid(auio);
2244 		error = fo_write(fp, auio, flags, &context);
2245 		if (uio_resid(auio) < orig_resid) {
2246 			os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
2247 		}
2248 	} else {
2249 		error = fo_read(fp, auio, flags, &context);
2250 	}
2251 
2252 	if (aresid) {
2253 		*aresid = uio_resid(auio);
2254 	} else if (uio_resid(auio) && error == 0) {
2255 		error = EIO;
2256 	}
2257 out:
2258 	fp_drop(p, fd, fp, 0);
2259 	return error;
2260 }
2261 
2262 
2263 #pragma mark syscalls
2264 
2265 #ifndef HFS_GET_BOOT_INFO
2266 #define HFS_GET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00004)
2267 #endif
2268 
2269 #ifndef HFS_SET_BOOT_INFO
2270 #define HFS_SET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00005)
2271 #endif
2272 
2273 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
2274 #define APFSIOC_REVERT_TO_SNAPSHOT  _IOW('J', 1, u_int64_t)
2275 #endif
2276 
2277 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
2278 	        (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
2279 	        (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
2280 	        ? 1 : 0)
2281 
2282 /*
2283  * sys_getdtablesize
2284  *
2285  * Description:	Returns the per process maximum size of the descriptor table
2286  *
2287  * Parameters:	p				Process being queried
2288  *		retval				Pointer to the call return area
2289  *
2290  * Returns:	0				Success
2291  *
2292  * Implicit returns:
2293  *		*retval (modified)		Size of dtable
2294  */
2295 int
sys_getdtablesize(proc_t p,__unused struct getdtablesize_args * uap,int32_t * retval)2296 sys_getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
2297 {
2298 	*retval = proc_limitgetcur_nofile(p);
2299 	return 0;
2300 }
2301 
2302 
2303 /*
2304  * check_file_seek_range
2305  *
2306  * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
2307  *
2308  * Parameters:  fl		Flock structure.
2309  *		cur_file_offset	Current offset in the file.
2310  *
2311  * Returns:     0               on Success.
2312  *		EOVERFLOW	on overflow.
2313  *		EINVAL          on offset less than zero.
2314  */
2315 
2316 static int
check_file_seek_range(struct flock * fl,off_t cur_file_offset)2317 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
2318 {
2319 	if (fl->l_whence == SEEK_CUR) {
2320 		/* Check if the start marker is beyond LLONG_MAX. */
2321 		if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
2322 			/* Check if start marker is negative */
2323 			if (fl->l_start < 0) {
2324 				return EINVAL;
2325 			}
2326 			return EOVERFLOW;
2327 		}
2328 		/* Check if the start marker is negative. */
2329 		if (fl->l_start + cur_file_offset < 0) {
2330 			return EINVAL;
2331 		}
2332 		/* Check if end marker is beyond LLONG_MAX. */
2333 		if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
2334 		    cur_file_offset, fl->l_len - 1))) {
2335 			return EOVERFLOW;
2336 		}
2337 		/* Check if the end marker is negative. */
2338 		if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
2339 		    fl->l_len < 0)) {
2340 			return EINVAL;
2341 		}
2342 	} else if (fl->l_whence == SEEK_SET) {
2343 		/* Check if the start marker is negative. */
2344 		if (fl->l_start < 0) {
2345 			return EINVAL;
2346 		}
2347 		/* Check if the end marker is beyond LLONG_MAX. */
2348 		if ((fl->l_len > 0) &&
2349 		    CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
2350 			return EOVERFLOW;
2351 		}
2352 		/* Check if the end marker is negative. */
2353 		if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
2354 			return EINVAL;
2355 		}
2356 	}
2357 	return 0;
2358 }
2359 
2360 
2361 /*
2362  * sys_dup
2363  *
2364  * Description:	Duplicate a file descriptor.
2365  *
2366  * Parameters:	p				Process performing the dup
2367  *		uap->fd				The fd to dup
2368  *		retval				Pointer to the call return area
2369  *
2370  * Returns:	0				Success
2371  *		!0				Errno
2372  *
2373  * Implicit returns:
2374  *		*retval (modified)		The new descriptor
2375  */
2376 int
sys_dup(proc_t p,struct dup_args * uap,int32_t * retval)2377 sys_dup(proc_t p, struct dup_args *uap, int32_t *retval)
2378 {
2379 	struct filedesc *fdp = &p->p_fd;
2380 	int old = uap->fd;
2381 	int new, error;
2382 	struct fileproc *fp;
2383 
2384 	proc_fdlock(p);
2385 	if ((error = fp_lookup(p, old, &fp, 1))) {
2386 		proc_fdunlock(p);
2387 		return error;
2388 	}
2389 	if (fp_isguarded(fp, GUARD_DUP)) {
2390 		error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2391 		(void) fp_drop(p, old, fp, 1);
2392 		proc_fdunlock(p);
2393 		return error;
2394 	}
2395 	if ((error = fdalloc(p, 0, &new))) {
2396 		fp_drop(p, old, fp, 1);
2397 		proc_fdunlock(p);
2398 		return error;
2399 	}
2400 	error = finishdup(p, fdp, old, new, 0, retval);
2401 	fp_drop(p, old, fp, 1);
2402 	proc_fdunlock(p);
2403 
2404 	if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
2405 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
2406 		    new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp_get_data(fp)));
2407 	}
2408 
2409 	return error;
2410 }
2411 
2412 /*
2413  * sys_dup2
2414  *
2415  * Description:	Duplicate a file descriptor to a particular value.
2416  *
2417  * Parameters:	p				Process performing the dup
2418  *		uap->from			The fd to dup
2419  *		uap->to				The fd to dup it to
2420  *		retval				Pointer to the call return area
2421  *
2422  * Returns:	0				Success
2423  *		!0				Errno
2424  *
2425  * Implicit returns:
2426  *		*retval (modified)		The new descriptor
2427  */
2428 int
sys_dup2(proc_t p,struct dup2_args * uap,int32_t * retval)2429 sys_dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
2430 {
2431 	return dup2(p, uap->from, uap->to, retval);
2432 }
2433 
2434 int
dup2(proc_t p,int old,int new,int * retval)2435 dup2(proc_t p, int old, int new, int *retval)
2436 {
2437 	struct filedesc *fdp = &p->p_fd;
2438 	struct fileproc *fp, *nfp;
2439 	int i, error;
2440 
2441 	proc_fdlock(p);
2442 
2443 startover:
2444 	if ((error = fp_lookup(p, old, &fp, 1))) {
2445 		proc_fdunlock(p);
2446 		return error;
2447 	}
2448 	if (fp_isguarded(fp, GUARD_DUP)) {
2449 		error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2450 		(void) fp_drop(p, old, fp, 1);
2451 		proc_fdunlock(p);
2452 		return error;
2453 	}
2454 	if (new < 0 || new >= proc_limitgetcur_nofile(p)) {
2455 		fp_drop(p, old, fp, 1);
2456 		proc_fdunlock(p);
2457 		return EBADF;
2458 	}
2459 	if (old == new) {
2460 		fp_drop(p, old, fp, 1);
2461 		*retval = new;
2462 		proc_fdunlock(p);
2463 		return 0;
2464 	}
2465 	if (new < 0 || new >= fdp->fd_nfiles) {
2466 		if ((error = fdalloc(p, new, &i))) {
2467 			fp_drop(p, old, fp, 1);
2468 			proc_fdunlock(p);
2469 			return error;
2470 		}
2471 		if (new != i) {
2472 			fdrelse(p, i);
2473 			goto closeit;
2474 		}
2475 	} else {
2476 closeit:
2477 		if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
2478 			fp_drop(p, old, fp, 1);
2479 			procfdtbl_waitfd(p, new);
2480 #if DIAGNOSTIC
2481 			proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2482 #endif
2483 			goto startover;
2484 		}
2485 
2486 		if ((nfp = fdp->fd_ofiles[new]) != NULL) {
2487 			if (fp_isguarded(nfp, GUARD_CLOSE)) {
2488 				fp_drop(p, old, fp, 1);
2489 				error = fp_guard_exception(p,
2490 				    new, nfp, kGUARD_EXC_CLOSE);
2491 				proc_fdunlock(p);
2492 				return error;
2493 			}
2494 			(void)fp_close_and_unlock(p, new, nfp, FD_DUP2RESV);
2495 			proc_fdlock(p);
2496 			assert(fdp->fd_ofileflags[new] & UF_RESERVED);
2497 		} else {
2498 #if DIAGNOSTIC
2499 			if (fdp->fd_ofiles[new] != NULL) {
2500 				panic("dup2: no ref on fileproc %d", new);
2501 			}
2502 #endif
2503 			procfdtbl_reservefd(p, new);
2504 		}
2505 	}
2506 #if DIAGNOSTIC
2507 	if (fdp->fd_ofiles[new] != 0) {
2508 		panic("dup2: overwriting fd_ofiles with new %d", new);
2509 	}
2510 	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
2511 		panic("dup2: unreserved fileflags with new %d", new);
2512 	}
2513 #endif
2514 	error = finishdup(p, fdp, old, new, 0, retval);
2515 	fp_drop(p, old, fp, 1);
2516 	proc_fdunlock(p);
2517 
2518 	return error;
2519 }
2520 
2521 
2522 /*
2523  * fcntl
2524  *
2525  * Description:	The file control system call.
2526  *
2527  * Parameters:	p				Process performing the fcntl
2528  *		uap->fd				The fd to operate against
2529  *		uap->cmd			The command to perform
2530  *		uap->arg			Pointer to the command argument
2531  *		retval				Pointer to the call return area
2532  *
2533  * Returns:	0				Success
2534  *		!0				Errno (see fcntl_nocancel)
2535  *
2536  * Implicit returns:
2537  *		*retval (modified)		fcntl return value (if any)
2538  *
2539  * Notes:	This system call differs from fcntl_nocancel() in that it
2540  *		tests for cancellation prior to performing a potentially
2541  *		blocking operation.
2542  */
2543 int
sys_fcntl(proc_t p,struct fcntl_args * uap,int32_t * retval)2544 sys_fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
2545 {
2546 	__pthread_testcancel(1);
2547 	return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
2548 }
2549 
2550 #define ACCOUNT_OPENFROM_ENTITLEMENT \
2551 	"com.apple.private.vfs.role-account-openfrom"
2552 
2553 /*
2554  * sys_fcntl_nocancel
2555  *
2556  * Description:	A non-cancel-testing file control system call.
2557  *
2558  * Parameters:	p				Process performing the fcntl
2559  *		uap->fd				The fd to operate against
2560  *		uap->cmd			The command to perform
2561  *		uap->arg			Pointer to the command argument
2562  *		retval				Pointer to the call return area
2563  *
2564  * Returns:	0				Success
2565  *		EINVAL
2566  *	fp_lookup:EBADF				Bad file descriptor
2567  * [F_DUPFD]
2568  *	fdalloc:EMFILE
2569  *	fdalloc:ENOMEM
2570  *	finishdup:EBADF
2571  *	finishdup:ENOMEM
2572  * [F_SETOWN]
2573  *		ESRCH
2574  * [F_SETLK]
2575  *		EBADF
2576  *		EOVERFLOW
2577  *	copyin:EFAULT
2578  *	vnode_getwithref:???
2579  *	VNOP_ADVLOCK:???
2580  *	msleep:ETIMEDOUT
2581  * [F_GETLK]
2582  *		EBADF
2583  *		EOVERFLOW
2584  *	copyin:EFAULT
2585  *	copyout:EFAULT
2586  *	vnode_getwithref:???
2587  *	VNOP_ADVLOCK:???
2588  * [F_PREALLOCATE]
2589  *		EBADF
2590  *		EINVAL
2591  *	copyin:EFAULT
2592  *	copyout:EFAULT
2593  *	vnode_getwithref:???
2594  *	VNOP_ALLOCATE:???
2595  * [F_SETSIZE,F_RDADVISE]
2596  *		EBADF
2597  *		EINVAL
2598  *	copyin:EFAULT
2599  *	vnode_getwithref:???
2600  * [F_RDAHEAD,F_NOCACHE]
2601  *		EBADF
2602  *	vnode_getwithref:???
2603  * [???]
2604  *
2605  * Implicit returns:
2606  *		*retval (modified)		fcntl return value (if any)
2607  */
2608 #define SYS_FCNTL_DECLARE_VFS_CONTEXT(context) \
2609 	struct vfs_context context = { \
2610 	    .vc_thread = current_thread(), \
2611 	    .vc_ucred = fp->f_cred, \
2612 	}
2613 
2614 static user_addr_t
sys_fnctl_parse_arg(proc_t p,user_long_t arg)2615 sys_fnctl_parse_arg(proc_t p, user_long_t arg)
2616 {
2617 	/*
2618 	 * Since the arg parameter is defined as a long but may be
2619 	 * either a long or a pointer we must take care to handle
2620 	 * sign extension issues.  Our sys call munger will sign
2621 	 * extend a long when we are called from a 32-bit process.
2622 	 * Since we can never have an address greater than 32-bits
2623 	 * from a 32-bit process we lop off the top 32-bits to avoid
2624 	 * getting the wrong address
2625 	 */
2626 	return proc_is64bit(p) ? arg : CAST_USER_ADDR_T((uint32_t)arg);
2627 }
2628 
2629 /* cleanup code common to fnctl functions, for when the fdlock is still held */
2630 static int
sys_fcntl_out(proc_t p,int fd,struct fileproc * fp,int error)2631 sys_fcntl_out(proc_t p, int fd, struct fileproc *fp, int error)
2632 {
2633 	fp_drop(p, fd, fp, 1);
2634 	proc_fdunlock(p);
2635 	return error;
2636 }
2637 
2638 /* cleanup code common to fnctl acting on vnodes, once they unlocked the fdlock */
2639 static int
sys_fcntl_outdrop(proc_t p,int fd,struct fileproc * fp,struct vnode * vp,int error)2640 sys_fcntl_outdrop(proc_t p, int fd, struct fileproc *fp, struct vnode *vp, int error)
2641 {
2642 #pragma unused(vp)
2643 
2644 	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2645 	fp_drop(p, fd, fp, 0);
2646 	return error;
2647 }
2648 
2649 typedef int (*sys_fnctl_handler_t)(proc_t p, int fd, int cmd, user_long_t arg,
2650     struct fileproc *fp, int32_t *retval);
2651 
2652 typedef int (*sys_fnctl_vnode_handler_t)(proc_t p, int fd, int cmd,
2653     user_long_t arg, struct fileproc *fp, struct vnode *vp, int32_t *retval);
2654 
2655 /*
2656  * SPI (private) for opening a file starting from a dir fd
2657  *
2658  * Note: do not inline to keep stack usage under control.
2659  */
2660 __attribute__((noinline))
2661 static int
sys_fcntl__OPENFROM(proc_t p,int fd,int cmd,user_long_t arg,struct fileproc * fp,struct vnode * vp,int32_t * retval)2662 sys_fcntl__OPENFROM(proc_t p, int fd, int cmd, user_long_t arg,
2663     struct fileproc *fp, struct vnode *vp, int32_t *retval)
2664 {
2665 #pragma unused(cmd)
2666 
2667 	user_addr_t argp = sys_fnctl_parse_arg(p, arg);
2668 	struct user_fopenfrom fopen;
2669 	struct vnode_attr *va;
2670 	struct nameidata *nd;
2671 	int error, cmode;
2672 	bool has_entitlement;
2673 
2674 	/* Check if this isn't a valid file descriptor */
2675 	if ((fp->f_flag & FREAD) == 0) {
2676 		return sys_fcntl_out(p, fd, fp, EBADF);
2677 	}
2678 	proc_fdunlock(p);
2679 
2680 	if (vnode_getwithref(vp)) {
2681 		error = ENOENT;
2682 		goto outdrop;
2683 	}
2684 
2685 	/* Only valid for directories */
2686 	if (vp->v_type != VDIR) {
2687 		vnode_put(vp);
2688 		error = ENOTDIR;
2689 		goto outdrop;
2690 	}
2691 
2692 	/*
2693 	 * Only entitled apps may use the credentials of the thread
2694 	 * that opened the file descriptor.
2695 	 * Non-entitled threads will use their own context.
2696 	 */
2697 	has_entitlement = IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT);
2698 
2699 	/* Get flags, mode and pathname arguments. */
2700 	if (IS_64BIT_PROCESS(p)) {
2701 		error = copyin(argp, &fopen, sizeof(fopen));
2702 	} else {
2703 		struct user32_fopenfrom fopen32;
2704 
2705 		error = copyin(argp, &fopen32, sizeof(fopen32));
2706 		fopen.o_flags = fopen32.o_flags;
2707 		fopen.o_mode = fopen32.o_mode;
2708 		fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
2709 	}
2710 	if (error) {
2711 		vnode_put(vp);
2712 		goto outdrop;
2713 	}
2714 
2715 	/* open1() can have really deep stacks, so allocate those */
2716 	va = kalloc_type(struct vnode_attr, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2717 	nd = kalloc_type(struct nameidata, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2718 
2719 	AUDIT_ARG(fflags, fopen.o_flags);
2720 	AUDIT_ARG(mode, fopen.o_mode);
2721 	VATTR_INIT(va);
2722 	/* Mask off all but regular access permissions */
2723 	cmode = ((fopen.o_mode & ~p->p_fd.fd_cmask) & ALLPERMS) & ~S_ISTXT;
2724 	VATTR_SET(va, va_mode, cmode & ACCESSPERMS);
2725 
2726 	SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2727 
2728 	/* Start the lookup relative to the file descriptor's vnode. */
2729 	NDINIT(nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2730 	    fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
2731 	nd->ni_dvp = vp;
2732 
2733 	error = open1(has_entitlement ? &context : vfs_context_current(),
2734 	    nd, fopen.o_flags, va, NULL, NULL, retval);
2735 
2736 	kfree_type(struct vnode_attr, va);
2737 	kfree_type(struct nameidata, nd);
2738 
2739 	vnode_put(vp);
2740 
2741 outdrop:
2742 	return sys_fcntl_outdrop(p, fd, fp, vp, error);
2743 }
2744 
2745 int
sys_fcntl_nocancel(proc_t p,struct fcntl_nocancel_args * uap,int32_t * retval)2746 sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
2747 {
2748 	int fd = uap->fd;
2749 	int cmd = uap->cmd;
2750 	struct filedesc *fdp = &p->p_fd;
2751 	struct fileproc *fp;
2752 	struct vnode *vp = NULLVP;      /* for AUDIT_ARG() at end */
2753 	unsigned int oflags, nflags;
2754 	int i, tmp, error, error2, flg = 0;
2755 	struct flock fl = {};
2756 	struct flocktimeout fltimeout;
2757 	struct timespec *timeout = NULL;
2758 	off_t offset;
2759 	int newmin;
2760 	daddr64_t lbn, bn;
2761 	unsigned int fflag;
2762 	user_addr_t argp;
2763 	boolean_t is64bit;
2764 	int has_entitlement = 0;
2765 
2766 	AUDIT_ARG(fd, uap->fd);
2767 	AUDIT_ARG(cmd, uap->cmd);
2768 
2769 	proc_fdlock(p);
2770 	if ((error = fp_lookup(p, fd, &fp, 1))) {
2771 		proc_fdunlock(p);
2772 		return error;
2773 	}
2774 
2775 	SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2776 
2777 	is64bit = proc_is64bit(p);
2778 	if (is64bit) {
2779 		argp = uap->arg;
2780 	} else {
2781 		/*
2782 		 * Since the arg parameter is defined as a long but may be
2783 		 * either a long or a pointer we must take care to handle
2784 		 * sign extension issues.  Our sys call munger will sign
2785 		 * extend a long when we are called from a 32-bit process.
2786 		 * Since we can never have an address greater than 32-bits
2787 		 * from a 32-bit process we lop off the top 32-bits to avoid
2788 		 * getting the wrong address
2789 		 */
2790 		argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
2791 	}
2792 
2793 #if CONFIG_MACF
2794 	error = mac_file_check_fcntl(kauth_cred_get(), fp->fp_glob, cmd, uap->arg);
2795 	if (error) {
2796 		goto out;
2797 	}
2798 #endif
2799 
2800 	switch (cmd) {
2801 	case F_DUPFD:
2802 	case F_DUPFD_CLOEXEC:
2803 		if (fp_isguarded(fp, GUARD_DUP)) {
2804 			error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
2805 			goto out;
2806 		}
2807 		newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2808 		AUDIT_ARG(value32, newmin);
2809 		if (newmin < 0 || newmin >= proc_limitgetcur_nofile(p)) {
2810 			error = EINVAL;
2811 			goto out;
2812 		}
2813 		if ((error = fdalloc(p, newmin, &i))) {
2814 			goto out;
2815 		}
2816 		error = finishdup(p, fdp, fd, i,
2817 		    cmd == F_DUPFD_CLOEXEC ? FP_CLOEXEC : 0, retval);
2818 		goto out;
2819 
2820 	case F_GETFD:
2821 		*retval = (fp->fp_flags & FP_CLOEXEC) ? FD_CLOEXEC : 0;
2822 		error = 0;
2823 		goto out;
2824 
2825 	case F_SETFD:
2826 		AUDIT_ARG(value32, (uint32_t)uap->arg);
2827 		if (uap->arg & FD_CLOEXEC) {
2828 			fp->fp_flags |= FP_CLOEXEC;
2829 			error = 0;
2830 		} else if (!fp->fp_guard_attrs) {
2831 			fp->fp_flags &= ~FP_CLOEXEC;
2832 			error = 0;
2833 		} else {
2834 			error = fp_guard_exception(p,
2835 			    fd, fp, kGUARD_EXC_NOCLOEXEC);
2836 		}
2837 		goto out;
2838 
2839 	case F_GETFL:
2840 		*retval = OFLAGS(fp->f_flag);
2841 		error = 0;
2842 		goto out;
2843 
2844 	case F_SETFL:
2845 		// FIXME (rdar://54898652)
2846 		//
2847 		// this code is broken if fnctl(F_SETFL), ioctl() are
2848 		// called concurrently for the same fileglob.
2849 
2850 		tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2851 		AUDIT_ARG(value32, tmp);
2852 
2853 		os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
2854 			nflags  = oflags & ~FCNTLFLAGS;
2855 			nflags |= FFLAGS(tmp) & FCNTLFLAGS;
2856 		});
2857 		tmp = nflags & FNONBLOCK;
2858 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2859 		if (error) {
2860 			goto out;
2861 		}
2862 		tmp = nflags & FASYNC;
2863 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
2864 		if (!error) {
2865 			goto out;
2866 		}
2867 		os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
2868 		tmp = 0;
2869 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2870 		goto out;
2871 
2872 	case F_GETOWN:
2873 		if (fp->f_type == DTYPE_SOCKET) {
2874 			*retval = ((struct socket *)fp_get_data(fp))->so_pgid;
2875 			error = 0;
2876 			goto out;
2877 		}
2878 		error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
2879 		*retval = -*retval;
2880 		goto out;
2881 
2882 	case F_SETOWN:
2883 		tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
2884 		AUDIT_ARG(value32, tmp);
2885 		if (fp->f_type == DTYPE_SOCKET) {
2886 			((struct socket *)fp_get_data(fp))->so_pgid = tmp;
2887 			error = 0;
2888 			goto out;
2889 		}
2890 		if (fp->f_type == DTYPE_PIPE) {
2891 			error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2892 			goto out;
2893 		}
2894 
2895 		if (tmp <= 0) {
2896 			tmp = -tmp;
2897 		} else {
2898 			proc_t p1 = proc_find(tmp);
2899 			if (p1 == 0) {
2900 				error = ESRCH;
2901 				goto out;
2902 			}
2903 			tmp = (int)p1->p_pgrpid;
2904 			proc_rele(p1);
2905 		}
2906 		error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2907 		goto out;
2908 
2909 	case F_SETNOSIGPIPE:
2910 		tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
2911 		if (fp->f_type == DTYPE_SOCKET) {
2912 #if SOCKETS
2913 			error = sock_setsockopt((struct socket *)fp_get_data(fp),
2914 			    SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
2915 #else
2916 			error = EINVAL;
2917 #endif
2918 		} else {
2919 			struct fileglob *fg = fp->fp_glob;
2920 
2921 			lck_mtx_lock_spin(&fg->fg_lock);
2922 			if (tmp) {
2923 				fg->fg_lflags |= FG_NOSIGPIPE;
2924 			} else {
2925 				fg->fg_lflags &= ~FG_NOSIGPIPE;
2926 			}
2927 			lck_mtx_unlock(&fg->fg_lock);
2928 			error = 0;
2929 		}
2930 		goto out;
2931 
2932 	case F_GETNOSIGPIPE:
2933 		if (fp->f_type == DTYPE_SOCKET) {
2934 #if SOCKETS
2935 			int retsize = sizeof(*retval);
2936 			error = sock_getsockopt((struct socket *)fp_get_data(fp),
2937 			    SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
2938 #else
2939 			error = EINVAL;
2940 #endif
2941 		} else {
2942 			*retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
2943 			    1 : 0;
2944 			error = 0;
2945 		}
2946 		goto out;
2947 
2948 	case F_SETCONFINED:
2949 		/*
2950 		 * If this is the only reference to this fglob in the process
2951 		 * and it's already marked as close-on-fork then mark it as
2952 		 * (immutably) "confined" i.e. any fd that points to it will
2953 		 * forever be close-on-fork, and attempts to use an IPC
2954 		 * mechanism to move the descriptor elsewhere will fail.
2955 		 */
2956 		if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
2957 			struct fileglob *fg = fp->fp_glob;
2958 
2959 			lck_mtx_lock_spin(&fg->fg_lock);
2960 			if (fg->fg_lflags & FG_CONFINED) {
2961 				error = 0;
2962 			} else if (1 != os_ref_get_count_raw(&fg->fg_count)) {
2963 				error = EAGAIN; /* go close the dup .. */
2964 			} else if (fp->fp_flags & FP_CLOFORK) {
2965 				fg->fg_lflags |= FG_CONFINED;
2966 				error = 0;
2967 			} else {
2968 				error = EBADF;  /* open without O_CLOFORK? */
2969 			}
2970 			lck_mtx_unlock(&fg->fg_lock);
2971 		} else {
2972 			/*
2973 			 * Other subsystems may have built on the immutability
2974 			 * of FG_CONFINED; clearing it may be tricky.
2975 			 */
2976 			error = EPERM;          /* immutable */
2977 		}
2978 		goto out;
2979 
2980 	case F_GETCONFINED:
2981 		*retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? 1 : 0;
2982 		error = 0;
2983 		goto out;
2984 
2985 	case F_SETLKWTIMEOUT:
2986 	case F_SETLKW:
2987 	case F_OFD_SETLKWTIMEOUT:
2988 	case F_OFD_SETLKW:
2989 		flg |= F_WAIT;
2990 		OS_FALLTHROUGH;
2991 
2992 	case F_SETLK:
2993 	case F_OFD_SETLK:
2994 		if (fp->f_type != DTYPE_VNODE) {
2995 			error = EBADF;
2996 			goto out;
2997 		}
2998 		vp = (struct vnode *)fp_get_data(fp);
2999 
3000 		fflag = fp->f_flag;
3001 		offset = fp->f_offset;
3002 		proc_fdunlock(p);
3003 
3004 		/* Copy in the lock structure */
3005 		if (F_SETLKWTIMEOUT == cmd || F_OFD_SETLKWTIMEOUT == cmd) {
3006 			error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
3007 			if (error) {
3008 				goto outdrop;
3009 			}
3010 			fl = fltimeout.fl;
3011 			timeout = &fltimeout.timeout;
3012 		} else {
3013 			error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3014 			if (error) {
3015 				goto outdrop;
3016 			}
3017 		}
3018 
3019 		/* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3020 		/* and ending byte for EOVERFLOW in SEEK_SET */
3021 		error = check_file_seek_range(&fl, offset);
3022 		if (error) {
3023 			goto outdrop;
3024 		}
3025 
3026 		if ((error = vnode_getwithref(vp))) {
3027 			goto outdrop;
3028 		}
3029 		if (fl.l_whence == SEEK_CUR) {
3030 			fl.l_start += offset;
3031 		}
3032 
3033 #if CONFIG_MACF
3034 		error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3035 		    F_SETLK, &fl);
3036 		if (error) {
3037 			(void)vnode_put(vp);
3038 			goto outdrop;
3039 		}
3040 #endif
3041 		switch (cmd) {
3042 		case F_OFD_SETLK:
3043 		case F_OFD_SETLKW:
3044 		case F_OFD_SETLKWTIMEOUT:
3045 			flg |= F_OFD_LOCK;
3046 			switch (fl.l_type) {
3047 			case F_RDLCK:
3048 				if ((fflag & FREAD) == 0) {
3049 					error = EBADF;
3050 					break;
3051 				}
3052 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3053 				    F_SETLK, &fl, flg, &context, timeout);
3054 				break;
3055 			case F_WRLCK:
3056 				if ((fflag & FWRITE) == 0) {
3057 					error = EBADF;
3058 					break;
3059 				}
3060 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3061 				    F_SETLK, &fl, flg, &context, timeout);
3062 				break;
3063 			case F_UNLCK:
3064 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3065 				    F_UNLCK, &fl, F_OFD_LOCK, &context,
3066 				    timeout);
3067 				break;
3068 			default:
3069 				error = EINVAL;
3070 				break;
3071 			}
3072 			if (0 == error &&
3073 			    (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
3074 				struct fileglob *fg = fp->fp_glob;
3075 
3076 				/*
3077 				 * arrange F_UNLCK on last close (once
3078 				 * set, FG_HAS_OFDLOCK is immutable)
3079 				 */
3080 				if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
3081 					lck_mtx_lock_spin(&fg->fg_lock);
3082 					fg->fg_lflags |= FG_HAS_OFDLOCK;
3083 					lck_mtx_unlock(&fg->fg_lock);
3084 				}
3085 			}
3086 			break;
3087 		default:
3088 			flg |= F_POSIX;
3089 			switch (fl.l_type) {
3090 			case F_RDLCK:
3091 				if ((fflag & FREAD) == 0) {
3092 					error = EBADF;
3093 					break;
3094 				}
3095 				// XXX UInt32 unsafe for LP64 kernel
3096 				os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3097 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3098 				    F_SETLK, &fl, flg, &context, timeout);
3099 				break;
3100 			case F_WRLCK:
3101 				if ((fflag & FWRITE) == 0) {
3102 					error = EBADF;
3103 					break;
3104 				}
3105 				// XXX UInt32 unsafe for LP64 kernel
3106 				os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3107 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3108 				    F_SETLK, &fl, flg, &context, timeout);
3109 				break;
3110 			case F_UNLCK:
3111 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3112 				    F_UNLCK, &fl, F_POSIX, &context, timeout);
3113 				break;
3114 			default:
3115 				error = EINVAL;
3116 				break;
3117 			}
3118 			break;
3119 		}
3120 		(void) vnode_put(vp);
3121 		goto outdrop;
3122 
3123 	case F_GETLK:
3124 	case F_OFD_GETLK:
3125 	case F_GETLKPID:
3126 	case F_OFD_GETLKPID:
3127 		if (fp->f_type != DTYPE_VNODE) {
3128 			error = EBADF;
3129 			goto out;
3130 		}
3131 		vp = (struct vnode *)fp_get_data(fp);
3132 
3133 		offset = fp->f_offset;
3134 		proc_fdunlock(p);
3135 
3136 		/* Copy in the lock structure */
3137 		error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3138 		if (error) {
3139 			goto outdrop;
3140 		}
3141 
3142 		/* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3143 		/* and ending byte for EOVERFLOW in SEEK_SET */
3144 		error = check_file_seek_range(&fl, offset);
3145 		if (error) {
3146 			goto outdrop;
3147 		}
3148 
3149 		if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
3150 			error = EINVAL;
3151 			goto outdrop;
3152 		}
3153 
3154 		switch (fl.l_type) {
3155 		case F_RDLCK:
3156 		case F_UNLCK:
3157 		case F_WRLCK:
3158 			break;
3159 		default:
3160 			error = EINVAL;
3161 			goto outdrop;
3162 		}
3163 
3164 		switch (fl.l_whence) {
3165 		case SEEK_CUR:
3166 		case SEEK_SET:
3167 		case SEEK_END:
3168 			break;
3169 		default:
3170 			error = EINVAL;
3171 			goto outdrop;
3172 		}
3173 
3174 		if ((error = vnode_getwithref(vp)) == 0) {
3175 			if (fl.l_whence == SEEK_CUR) {
3176 				fl.l_start += offset;
3177 			}
3178 
3179 #if CONFIG_MACF
3180 			error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3181 			    cmd, &fl);
3182 			if (error == 0)
3183 #endif
3184 			switch (cmd) {
3185 			case F_OFD_GETLK:
3186 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3187 				    F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
3188 				break;
3189 			case F_OFD_GETLKPID:
3190 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3191 				    F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
3192 				break;
3193 			default:
3194 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3195 				    cmd, &fl, F_POSIX, &context, NULL);
3196 				break;
3197 			}
3198 
3199 			(void)vnode_put(vp);
3200 
3201 			if (error == 0) {
3202 				error = copyout((caddr_t)&fl, argp, sizeof(fl));
3203 			}
3204 		}
3205 		goto outdrop;
3206 
3207 	case F_PREALLOCATE: {
3208 		fstore_t alloc_struct;    /* structure for allocate command */
3209 		u_int32_t alloc_flags = 0;
3210 
3211 		if (fp->f_type != DTYPE_VNODE) {
3212 			error = EBADF;
3213 			goto out;
3214 		}
3215 
3216 		vp = (struct vnode *)fp_get_data(fp);
3217 		proc_fdunlock(p);
3218 
3219 		/* make sure that we have write permission */
3220 		if ((fp->f_flag & FWRITE) == 0) {
3221 			error = EBADF;
3222 			goto outdrop;
3223 		}
3224 
3225 		error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
3226 		if (error) {
3227 			goto outdrop;
3228 		}
3229 
3230 		/* now set the space allocated to 0 */
3231 		alloc_struct.fst_bytesalloc = 0;
3232 
3233 		/*
3234 		 * Do some simple parameter checking
3235 		 */
3236 
3237 		/* set up the flags */
3238 
3239 		alloc_flags |= PREALLOCATE;
3240 
3241 		if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
3242 			alloc_flags |= ALLOCATECONTIG;
3243 		}
3244 
3245 		if (alloc_struct.fst_flags & F_ALLOCATEALL) {
3246 			alloc_flags |= ALLOCATEALL;
3247 		}
3248 
3249 		/*
3250 		 * Do any position mode specific stuff.  The only
3251 		 * position mode  supported now is PEOFPOSMODE
3252 		 */
3253 
3254 		switch (alloc_struct.fst_posmode) {
3255 		case F_PEOFPOSMODE:
3256 			if (alloc_struct.fst_offset != 0) {
3257 				error = EINVAL;
3258 				goto outdrop;
3259 			}
3260 
3261 			alloc_flags |= ALLOCATEFROMPEOF;
3262 			break;
3263 
3264 		case F_VOLPOSMODE:
3265 			if (alloc_struct.fst_offset <= 0) {
3266 				error = EINVAL;
3267 				goto outdrop;
3268 			}
3269 
3270 			alloc_flags |= ALLOCATEFROMVOL;
3271 			break;
3272 
3273 		default: {
3274 			error = EINVAL;
3275 			goto outdrop;
3276 		}
3277 		}
3278 		if ((error = vnode_getwithref(vp)) == 0) {
3279 			/*
3280 			 * call allocate to get the space
3281 			 */
3282 			error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
3283 			    &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
3284 			    &context);
3285 			(void)vnode_put(vp);
3286 
3287 			error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
3288 
3289 			if (error == 0) {
3290 				error = error2;
3291 			}
3292 		}
3293 		goto outdrop;
3294 	}
3295 	case F_PUNCHHOLE: {
3296 		fpunchhole_t args;
3297 
3298 		if (fp->f_type != DTYPE_VNODE) {
3299 			error = EBADF;
3300 			goto out;
3301 		}
3302 
3303 		vp = (struct vnode *)fp_get_data(fp);
3304 		proc_fdunlock(p);
3305 
3306 		/* need write permissions */
3307 		if ((fp->f_flag & FWRITE) == 0) {
3308 			error = EPERM;
3309 			goto outdrop;
3310 		}
3311 
3312 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3313 			goto outdrop;
3314 		}
3315 
3316 		if ((error = vnode_getwithref(vp))) {
3317 			goto outdrop;
3318 		}
3319 
3320 #if CONFIG_MACF
3321 		if ((error = mac_vnode_check_write(&context, fp->fp_glob->fg_cred, vp))) {
3322 			(void)vnode_put(vp);
3323 			goto outdrop;
3324 		}
3325 #endif
3326 
3327 		error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
3328 		(void)vnode_put(vp);
3329 
3330 		goto outdrop;
3331 	}
3332 	case F_TRIM_ACTIVE_FILE: {
3333 		ftrimactivefile_t args;
3334 
3335 		if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
3336 			error = EACCES;
3337 			goto out;
3338 		}
3339 
3340 		if (fp->f_type != DTYPE_VNODE) {
3341 			error = EBADF;
3342 			goto out;
3343 		}
3344 
3345 		vp = (struct vnode *)fp_get_data(fp);
3346 		proc_fdunlock(p);
3347 
3348 		/* need write permissions */
3349 		if ((fp->f_flag & FWRITE) == 0) {
3350 			error = EPERM;
3351 			goto outdrop;
3352 		}
3353 
3354 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3355 			goto outdrop;
3356 		}
3357 
3358 		if ((error = vnode_getwithref(vp))) {
3359 			goto outdrop;
3360 		}
3361 
3362 		error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
3363 		(void)vnode_put(vp);
3364 
3365 		goto outdrop;
3366 	}
3367 	case F_SPECULATIVE_READ: {
3368 		fspecread_t args;
3369 		off_t temp_length = 0;
3370 
3371 		if (fp->f_type != DTYPE_VNODE) {
3372 			error = EBADF;
3373 			goto out;
3374 		}
3375 
3376 		vp = (struct vnode *)fp_get_data(fp);
3377 		proc_fdunlock(p);
3378 
3379 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3380 			goto outdrop;
3381 		}
3382 
3383 		/* Discard invalid offsets or lengths */
3384 		if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
3385 			error = EINVAL;
3386 			goto outdrop;
3387 		}
3388 
3389 		/*
3390 		 * Round the file offset down to a page-size boundary (or to 0).
3391 		 * The filesystem will need to round the length up to the end of the page boundary
3392 		 * or to the EOF of the file.
3393 		 */
3394 		uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
3395 		uint64_t foff_delta = args.fsr_offset - foff;
3396 		args.fsr_offset = (off_t) foff;
3397 
3398 		/*
3399 		 * Now add in the delta to the supplied length. Since we may have adjusted the
3400 		 * offset, increase it by the amount that we adjusted.
3401 		 */
3402 		if (os_add_overflow(args.fsr_length, foff_delta, &args.fsr_length)) {
3403 			error = EOVERFLOW;
3404 			goto outdrop;
3405 		}
3406 
3407 		/*
3408 		 * Make sure (fsr_offset + fsr_length) does not overflow.
3409 		 */
3410 		if (os_add_overflow(args.fsr_offset, args.fsr_length, &temp_length)) {
3411 			error = EOVERFLOW;
3412 			goto outdrop;
3413 		}
3414 
3415 		if ((error = vnode_getwithref(vp))) {
3416 			goto outdrop;
3417 		}
3418 		error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
3419 		(void)vnode_put(vp);
3420 
3421 		goto outdrop;
3422 	}
3423 	case F_SETSIZE:
3424 		if (fp->f_type != DTYPE_VNODE) {
3425 			error = EBADF;
3426 			goto out;
3427 		}
3428 		vp = (struct vnode *)fp_get_data(fp);
3429 		proc_fdunlock(p);
3430 
3431 		error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
3432 		if (error) {
3433 			goto outdrop;
3434 		}
3435 		AUDIT_ARG(value64, offset);
3436 
3437 		error = vnode_getwithref(vp);
3438 		if (error) {
3439 			goto outdrop;
3440 		}
3441 
3442 #if CONFIG_MACF
3443 		error = mac_vnode_check_truncate(&context,
3444 		    fp->fp_glob->fg_cred, vp);
3445 		if (error) {
3446 			(void)vnode_put(vp);
3447 			goto outdrop;
3448 		}
3449 #endif
3450 		/*
3451 		 * Make sure that we are root.  Growing a file
3452 		 * without zero filling the data is a security hole.
3453 		 */
3454 		if (!kauth_cred_issuser(kauth_cred_get())) {
3455 			error = EACCES;
3456 		} else {
3457 			/*
3458 			 * Require privilege to change file size without zerofill,
3459 			 * else will change the file size and zerofill it.
3460 			 */
3461 			error = priv_check_cred(kauth_cred_get(), PRIV_VFS_SETSIZE, 0);
3462 			if (error == 0) {
3463 				error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
3464 			} else {
3465 				error = vnode_setsize(vp, offset, 0, &context);
3466 			}
3467 
3468 #if CONFIG_MACF
3469 			if (error == 0) {
3470 				mac_vnode_notify_truncate(&context, fp->fp_glob->fg_cred, vp);
3471 			}
3472 #endif
3473 		}
3474 
3475 		(void)vnode_put(vp);
3476 		goto outdrop;
3477 
3478 	case F_RDAHEAD:
3479 		if (fp->f_type != DTYPE_VNODE) {
3480 			error = EBADF;
3481 			goto out;
3482 		}
3483 		if (uap->arg) {
3484 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3485 		} else {
3486 			os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3487 		}
3488 		goto out;
3489 
3490 	case F_NOCACHE:
3491 		if (fp->f_type != DTYPE_VNODE) {
3492 			error = EBADF;
3493 			goto out;
3494 		}
3495 		if (uap->arg) {
3496 			os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3497 		} else {
3498 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3499 		}
3500 		goto out;
3501 
3502 	case F_NODIRECT:
3503 		if (fp->f_type != DTYPE_VNODE) {
3504 			error = EBADF;
3505 			goto out;
3506 		}
3507 		if (uap->arg) {
3508 			os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3509 		} else {
3510 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3511 		}
3512 		goto out;
3513 
3514 	case F_SINGLE_WRITER:
3515 		if (fp->f_type != DTYPE_VNODE) {
3516 			error = EBADF;
3517 			goto out;
3518 		}
3519 		if (uap->arg) {
3520 			os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3521 		} else {
3522 			os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3523 		}
3524 		goto out;
3525 
3526 	case F_GLOBAL_NOCACHE:
3527 		if (fp->f_type != DTYPE_VNODE) {
3528 			error = EBADF;
3529 			goto out;
3530 		}
3531 		vp = (struct vnode *)fp_get_data(fp);
3532 		proc_fdunlock(p);
3533 
3534 		if ((error = vnode_getwithref(vp)) == 0) {
3535 			*retval = vnode_isnocache(vp);
3536 
3537 			if (uap->arg) {
3538 				vnode_setnocache(vp);
3539 			} else {
3540 				vnode_clearnocache(vp);
3541 			}
3542 
3543 			(void)vnode_put(vp);
3544 		}
3545 		goto outdrop;
3546 
3547 	case F_CHECK_OPENEVT:
3548 		if (fp->f_type != DTYPE_VNODE) {
3549 			error = EBADF;
3550 			goto out;
3551 		}
3552 		vp = (struct vnode *)fp_get_data(fp);
3553 		proc_fdunlock(p);
3554 
3555 		if ((error = vnode_getwithref(vp)) == 0) {
3556 			*retval = vnode_is_openevt(vp);
3557 
3558 			if (uap->arg) {
3559 				vnode_set_openevt(vp);
3560 			} else {
3561 				vnode_clear_openevt(vp);
3562 			}
3563 
3564 			(void)vnode_put(vp);
3565 		}
3566 		goto outdrop;
3567 
3568 	case F_RDADVISE: {
3569 		struct radvisory ra_struct;
3570 
3571 		if (fp->f_type != DTYPE_VNODE) {
3572 			error = EBADF;
3573 			goto out;
3574 		}
3575 		vp = (struct vnode *)fp_get_data(fp);
3576 		proc_fdunlock(p);
3577 
3578 		if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
3579 			goto outdrop;
3580 		}
3581 		if (ra_struct.ra_offset < 0 || ra_struct.ra_count < 0) {
3582 			error = EINVAL;
3583 			goto outdrop;
3584 		}
3585 		if ((error = vnode_getwithref(vp)) == 0) {
3586 			error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
3587 
3588 			(void)vnode_put(vp);
3589 		}
3590 		goto outdrop;
3591 	}
3592 
3593 	case F_FLUSH_DATA:
3594 
3595 		if (fp->f_type != DTYPE_VNODE) {
3596 			error = EBADF;
3597 			goto out;
3598 		}
3599 		vp = (struct vnode *)fp_get_data(fp);
3600 		proc_fdunlock(p);
3601 
3602 		if ((error = vnode_getwithref(vp)) == 0) {
3603 			error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
3604 
3605 			(void)vnode_put(vp);
3606 		}
3607 		goto outdrop;
3608 
3609 	case F_LOG2PHYS:
3610 	case F_LOG2PHYS_EXT: {
3611 		struct log2phys l2p_struct = {};    /* structure for allocate command */
3612 		int devBlockSize;
3613 
3614 		off_t file_offset = 0;
3615 		size_t a_size = 0;
3616 		size_t run = 0;
3617 
3618 		if (cmd == F_LOG2PHYS_EXT) {
3619 			error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
3620 			if (error) {
3621 				goto out;
3622 			}
3623 			file_offset = l2p_struct.l2p_devoffset;
3624 		} else {
3625 			file_offset = fp->f_offset;
3626 		}
3627 		if (fp->f_type != DTYPE_VNODE) {
3628 			error = EBADF;
3629 			goto out;
3630 		}
3631 		vp = (struct vnode *)fp_get_data(fp);
3632 		proc_fdunlock(p);
3633 		if ((error = vnode_getwithref(vp))) {
3634 			goto outdrop;
3635 		}
3636 		error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
3637 		if (error) {
3638 			(void)vnode_put(vp);
3639 			goto outdrop;
3640 		}
3641 		error = VNOP_BLKTOOFF(vp, lbn, &offset);
3642 		if (error) {
3643 			(void)vnode_put(vp);
3644 			goto outdrop;
3645 		}
3646 		devBlockSize = vfs_devblocksize(vnode_mount(vp));
3647 		if (cmd == F_LOG2PHYS_EXT) {
3648 			if (l2p_struct.l2p_contigbytes < 0) {
3649 				vnode_put(vp);
3650 				error = EINVAL;
3651 				goto outdrop;
3652 			}
3653 
3654 			a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
3655 		} else {
3656 			a_size = devBlockSize;
3657 		}
3658 
3659 		error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
3660 
3661 		(void)vnode_put(vp);
3662 
3663 		if (!error) {
3664 			l2p_struct.l2p_flags = 0;       /* for now */
3665 			if (cmd == F_LOG2PHYS_EXT) {
3666 				l2p_struct.l2p_contigbytes = run - (file_offset - offset);
3667 			} else {
3668 				l2p_struct.l2p_contigbytes = 0; /* for now */
3669 			}
3670 
3671 			/*
3672 			 * The block number being -1 suggests that the file offset is not backed
3673 			 * by any real blocks on-disk.  As a result, just let it be passed back up wholesale.
3674 			 */
3675 			if (bn == -1) {
3676 				/* Don't multiply it by the block size */
3677 				l2p_struct.l2p_devoffset = bn;
3678 			} else {
3679 				l2p_struct.l2p_devoffset = bn * devBlockSize;
3680 				l2p_struct.l2p_devoffset += file_offset - offset;
3681 			}
3682 			error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
3683 		}
3684 		goto outdrop;
3685 	}
3686 	case F_GETPATH:
3687 	case F_GETPATH_NOFIRMLINK: {
3688 		char *pathbufp;
3689 		int pathlen;
3690 
3691 		if (fp->f_type != DTYPE_VNODE) {
3692 			error = EBADF;
3693 			goto out;
3694 		}
3695 		vp = (struct vnode *)fp_get_data(fp);
3696 		proc_fdunlock(p);
3697 
3698 		pathlen = MAXPATHLEN;
3699 		pathbufp = zalloc(ZV_NAMEI);
3700 
3701 		if ((error = vnode_getwithref(vp)) == 0) {
3702 			if (cmd == F_GETPATH_NOFIRMLINK) {
3703 				error = vn_getpath_ext(vp, NULL, pathbufp, &pathlen, VN_GETPATH_NO_FIRMLINK);
3704 			} else {
3705 				error = vn_getpath(vp, pathbufp, &pathlen);
3706 			}
3707 			(void)vnode_put(vp);
3708 
3709 			if (error == 0) {
3710 				error = copyout((caddr_t)pathbufp, argp, pathlen);
3711 			}
3712 		}
3713 		zfree(ZV_NAMEI, pathbufp);
3714 		goto outdrop;
3715 	}
3716 
3717 	case F_PATHPKG_CHECK: {
3718 		char *pathbufp;
3719 		size_t pathlen;
3720 
3721 		if (fp->f_type != DTYPE_VNODE) {
3722 			error = EBADF;
3723 			goto out;
3724 		}
3725 		vp = (struct vnode *)fp_get_data(fp);
3726 		proc_fdunlock(p);
3727 
3728 		pathlen = MAXPATHLEN;
3729 		pathbufp = zalloc(ZV_NAMEI);
3730 
3731 		if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
3732 			if ((error = vnode_getwithref(vp)) == 0) {
3733 				AUDIT_ARG(text, pathbufp);
3734 				error = vn_path_package_check(vp, pathbufp, (int)pathlen, retval);
3735 
3736 				(void)vnode_put(vp);
3737 			}
3738 		}
3739 		zfree(ZV_NAMEI, pathbufp);
3740 		goto outdrop;
3741 	}
3742 
3743 	case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
3744 	case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZE
3745 	case F_BARRIERFSYNC:  // fsync + barrier
3746 	case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
3747 	case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
3748 		if (fp->f_type != DTYPE_VNODE) {
3749 			error = EBADF;
3750 			goto out;
3751 		}
3752 		vp = (struct vnode *)fp_get_data(fp);
3753 		proc_fdunlock(p);
3754 
3755 		if ((error = vnode_getwithref(vp)) == 0) {
3756 			error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
3757 
3758 			(void)vnode_put(vp);
3759 		}
3760 		break;
3761 	}
3762 
3763 	/*
3764 	 * SPI (private) for opening a file starting from a dir fd
3765 	 */
3766 	case F_OPENFROM: {
3767 		/* Check if this isn't a valid file descriptor */
3768 		if (fp->f_type != DTYPE_VNODE) {
3769 			error = EBADF;
3770 			goto out;
3771 		}
3772 		vp = (struct vnode *)fp_get_data(fp);
3773 
3774 		return sys_fcntl__OPENFROM(p, fd, cmd, uap->arg, fp, vp, retval);
3775 	}
3776 
3777 	/*
3778 	 * SPI (private) for unlinking a file starting from a dir fd
3779 	 */
3780 	case F_UNLINKFROM: {
3781 		user_addr_t pathname;
3782 
3783 		/* Check if this isn't a valid file descriptor */
3784 		if ((fp->f_type != DTYPE_VNODE) ||
3785 		    (fp->f_flag & FREAD) == 0) {
3786 			error = EBADF;
3787 			goto out;
3788 		}
3789 		vp = (struct vnode *)fp_get_data(fp);
3790 		proc_fdunlock(p);
3791 
3792 		if (vnode_getwithref(vp)) {
3793 			error = ENOENT;
3794 			goto outdrop;
3795 		}
3796 
3797 		/* Only valid for directories */
3798 		if (vp->v_type != VDIR) {
3799 			vnode_put(vp);
3800 			error = ENOTDIR;
3801 			goto outdrop;
3802 		}
3803 
3804 		/*
3805 		 * Only entitled apps may use the credentials of the thread
3806 		 * that opened the file descriptor.
3807 		 * Non-entitled threads will use their own context.
3808 		 */
3809 		if (IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT)) {
3810 			has_entitlement = 1;
3811 		}
3812 
3813 		/* Get flags, mode and pathname arguments. */
3814 		if (IS_64BIT_PROCESS(p)) {
3815 			pathname = (user_addr_t)argp;
3816 		} else {
3817 			pathname = CAST_USER_ADDR_T(argp);
3818 		}
3819 
3820 		/* Start the lookup relative to the file descriptor's vnode. */
3821 		error = unlink1(has_entitlement ? &context : vfs_context_current(),
3822 		    vp, pathname, UIO_USERSPACE, 0);
3823 
3824 		vnode_put(vp);
3825 		break;
3826 	}
3827 
3828 	case F_ADDSIGS:
3829 	case F_ADDFILESIGS:
3830 	case F_ADDFILESIGS_FOR_DYLD_SIM:
3831 	case F_ADDFILESIGS_RETURN:
3832 	case F_ADDFILESIGS_INFO:
3833 	{
3834 		struct cs_blob *blob = NULL;
3835 		struct user_fsignatures fs;
3836 		kern_return_t kr;
3837 		vm_offset_t kernel_blob_addr;
3838 		vm_size_t kernel_blob_size;
3839 		int blob_add_flags = 0;
3840 		const size_t sizeof_fs = (cmd == F_ADDFILESIGS_INFO ?
3841 		    offsetof(struct user_fsignatures, fs_cdhash /* first output element */) :
3842 		    offsetof(struct user_fsignatures, fs_fsignatures_size /* compat */));
3843 
3844 		if (fp->f_type != DTYPE_VNODE) {
3845 			error = EBADF;
3846 			goto out;
3847 		}
3848 		vp = (struct vnode *)fp_get_data(fp);
3849 		proc_fdunlock(p);
3850 
3851 		if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3852 			blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
3853 			if ((proc_getcsflags(p) & CS_KILL) == 0) {
3854 				proc_lock(p);
3855 				proc_csflags_set(p, CS_KILL);
3856 				proc_unlock(p);
3857 			}
3858 		}
3859 
3860 		error = vnode_getwithref(vp);
3861 		if (error) {
3862 			goto outdrop;
3863 		}
3864 
3865 		if (IS_64BIT_PROCESS(p)) {
3866 			error = copyin(argp, &fs, sizeof_fs);
3867 		} else {
3868 			if (cmd == F_ADDFILESIGS_INFO) {
3869 				error = EINVAL;
3870 				vnode_put(vp);
3871 				goto outdrop;
3872 			}
3873 
3874 			struct user32_fsignatures fs32;
3875 
3876 			error = copyin(argp, &fs32, sizeof(fs32));
3877 			fs.fs_file_start = fs32.fs_file_start;
3878 			fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
3879 			fs.fs_blob_size = fs32.fs_blob_size;
3880 		}
3881 
3882 		if (error) {
3883 			vnode_put(vp);
3884 			goto outdrop;
3885 		}
3886 
3887 		/*
3888 		 * First check if we have something loaded a this offset
3889 		 */
3890 		blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
3891 		if (blob != NULL) {
3892 			/* If this is for dyld_sim revalidate the blob */
3893 			if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3894 				error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
3895 				if (error) {
3896 					blob = NULL;
3897 					if (error != EAGAIN) {
3898 						vnode_put(vp);
3899 						goto outdrop;
3900 					}
3901 				}
3902 			}
3903 		}
3904 
3905 		if (blob == NULL) {
3906 			/*
3907 			 * An arbitrary limit, to prevent someone from mapping in a 20GB blob.  This should cover
3908 			 * our use cases for the immediate future, but note that at the time of this commit, some
3909 			 * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
3910 			 *
3911 			 * We should consider how we can manage this more effectively; the above means that some
3912 			 * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
3913 			 * threshold considered ridiculous at the time of this change.
3914 			 */
3915 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
3916 			if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
3917 				error = E2BIG;
3918 				vnode_put(vp);
3919 				goto outdrop;
3920 			}
3921 
3922 			kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
3923 			kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
3924 			if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
3925 				error = ENOMEM;
3926 				vnode_put(vp);
3927 				goto outdrop;
3928 			}
3929 
3930 			if (cmd == F_ADDSIGS) {
3931 				error = copyin(fs.fs_blob_start,
3932 				    (void *) kernel_blob_addr,
3933 				    fs.fs_blob_size);
3934 			} else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM || F_ADDFILESIGS_INFO */
3935 				int resid;
3936 
3937 				error = vn_rdwr(UIO_READ,
3938 				    vp,
3939 				    (caddr_t) kernel_blob_addr,
3940 				    (int)kernel_blob_size,
3941 				    fs.fs_file_start + fs.fs_blob_start,
3942 				    UIO_SYSSPACE,
3943 				    0,
3944 				    kauth_cred_get(),
3945 				    &resid,
3946 				    p);
3947 				if ((error == 0) && resid) {
3948 					/* kernel_blob_size rounded to a page size, but signature may be at end of file */
3949 					memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
3950 				}
3951 			}
3952 
3953 			if (error) {
3954 				ubc_cs_blob_deallocate(kernel_blob_addr,
3955 				    kernel_blob_size);
3956 				vnode_put(vp);
3957 				goto outdrop;
3958 			}
3959 
3960 			blob = NULL;
3961 			error = ubc_cs_blob_add(vp,
3962 			    proc_platform(p),
3963 			    CPU_TYPE_ANY,                       /* not for a specific architecture */
3964 			    CPU_SUBTYPE_ANY,
3965 			    fs.fs_file_start,
3966 			    &kernel_blob_addr,
3967 			    kernel_blob_size,
3968 			    NULL,
3969 			    blob_add_flags,
3970 			    &blob);
3971 
3972 			/* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
3973 			if (error) {
3974 				if (kernel_blob_addr) {
3975 					ubc_cs_blob_deallocate(kernel_blob_addr,
3976 					    kernel_blob_size);
3977 				}
3978 				vnode_put(vp);
3979 				goto outdrop;
3980 			} else {
3981 #if CHECK_CS_VALIDATION_BITMAP
3982 				ubc_cs_validation_bitmap_allocate( vp );
3983 #endif
3984 			}
3985 		}
3986 
3987 		if (cmd == F_ADDFILESIGS_RETURN || cmd == F_ADDFILESIGS_FOR_DYLD_SIM ||
3988 		    cmd == F_ADDFILESIGS_INFO) {
3989 			/*
3990 			 * The first element of the structure is a
3991 			 * off_t that happen to have the same size for
3992 			 * all archs. Lets overwrite that.
3993 			 */
3994 			off_t end_offset = 0;
3995 			if (blob) {
3996 				end_offset = blob->csb_end_offset;
3997 			}
3998 			error = copyout(&end_offset, argp, sizeof(end_offset));
3999 
4000 			if (error) {
4001 				vnode_put(vp);
4002 				goto outdrop;
4003 			}
4004 		}
4005 
4006 		if (cmd == F_ADDFILESIGS_INFO) {
4007 			/* Return information. What we copy out depends on the size of the
4008 			 * passed in structure, to keep binary compatibility. */
4009 
4010 			if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
4011 				// enough room for fs_cdhash[20]+fs_hash_type
4012 
4013 				if (blob != NULL) {
4014 					error = copyout(blob->csb_cdhash,
4015 					    (vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
4016 					    USER_FSIGNATURES_CDHASH_LEN);
4017 					if (error) {
4018 						vnode_put(vp);
4019 						goto outdrop;
4020 					}
4021 					int hashtype = cs_hash_type(blob->csb_hashtype);
4022 					error = copyout(&hashtype,
4023 					    (vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
4024 					    sizeof(int));
4025 					if (error) {
4026 						vnode_put(vp);
4027 						goto outdrop;
4028 					}
4029 				}
4030 			}
4031 		}
4032 
4033 		(void) vnode_put(vp);
4034 		break;
4035 	}
4036 #if CONFIG_SUPPLEMENTAL_SIGNATURES
4037 	case F_ADDFILESUPPL:
4038 	{
4039 		struct vnode *ivp;
4040 		struct cs_blob *blob = NULL;
4041 		struct user_fsupplement fs;
4042 		int orig_fd;
4043 		struct fileproc* orig_fp = NULL;
4044 		kern_return_t kr;
4045 		vm_offset_t kernel_blob_addr;
4046 		vm_size_t kernel_blob_size;
4047 
4048 		if (!IS_64BIT_PROCESS(p)) {
4049 			error = EINVAL;
4050 			goto out; // drop fp and unlock fds
4051 		}
4052 
4053 		if (fp->f_type != DTYPE_VNODE) {
4054 			error = EBADF;
4055 			goto out;
4056 		}
4057 
4058 		error = copyin(argp, &fs, sizeof(fs));
4059 		if (error) {
4060 			goto out;
4061 		}
4062 
4063 		orig_fd = fs.fs_orig_fd;
4064 		if ((error = fp_lookup(p, orig_fd, &orig_fp, 1))) {
4065 			printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
4066 			goto out;
4067 		}
4068 
4069 		if (orig_fp->f_type != DTYPE_VNODE) {
4070 			error = EBADF;
4071 			fp_drop(p, orig_fd, orig_fp, 1);
4072 			goto out;
4073 		}
4074 
4075 		ivp = (struct vnode *)fp_get_data(orig_fp);
4076 
4077 		vp = (struct vnode *)fp_get_data(fp);
4078 
4079 		proc_fdunlock(p);
4080 
4081 		error = vnode_getwithref(ivp);
4082 		if (error) {
4083 			fp_drop(p, orig_fd, orig_fp, 0);
4084 			goto outdrop; //drop fp
4085 		}
4086 
4087 		error = vnode_getwithref(vp);
4088 		if (error) {
4089 			vnode_put(ivp);
4090 			fp_drop(p, orig_fd, orig_fp, 0);
4091 			goto outdrop;
4092 		}
4093 
4094 		if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
4095 			error = E2BIG;
4096 			goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
4097 		}
4098 
4099 		kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
4100 		kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
4101 		if (kr != KERN_SUCCESS) {
4102 			error = ENOMEM;
4103 			goto dropboth;
4104 		}
4105 
4106 		int resid;
4107 		error = vn_rdwr(UIO_READ, vp,
4108 		    (caddr_t)kernel_blob_addr, (int)kernel_blob_size,
4109 		    fs.fs_file_start + fs.fs_blob_start,
4110 		    UIO_SYSSPACE, 0,
4111 		    kauth_cred_get(), &resid, p);
4112 		if ((error == 0) && resid) {
4113 			/* kernel_blob_size rounded to a page size, but signature may be at end of file */
4114 			memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
4115 		}
4116 
4117 		if (error) {
4118 			ubc_cs_blob_deallocate(kernel_blob_addr,
4119 			    kernel_blob_size);
4120 			goto dropboth;
4121 		}
4122 
4123 		error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
4124 		    &kernel_blob_addr, kernel_blob_size, &blob);
4125 
4126 		/* ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed */
4127 		if (error) {
4128 			if (kernel_blob_addr) {
4129 				ubc_cs_blob_deallocate(kernel_blob_addr,
4130 				    kernel_blob_size);
4131 			}
4132 			goto dropboth;
4133 		}
4134 		vnode_put(ivp);
4135 		vnode_put(vp);
4136 		fp_drop(p, orig_fd, orig_fp, 0);
4137 		break;
4138 
4139 dropboth:
4140 		vnode_put(ivp);
4141 		vnode_put(vp);
4142 		fp_drop(p, orig_fd, orig_fp, 0);
4143 		goto outdrop;
4144 	}
4145 #endif
4146 	case F_GETCODEDIR:
4147 	case F_FINDSIGS: {
4148 		error = ENOTSUP;
4149 		goto out;
4150 	}
4151 	case F_CHECK_LV: {
4152 		struct fileglob *fg;
4153 		fchecklv_t lv = {};
4154 
4155 		if (fp->f_type != DTYPE_VNODE) {
4156 			error = EBADF;
4157 			goto out;
4158 		}
4159 		fg = fp->fp_glob;
4160 		proc_fdunlock(p);
4161 
4162 		if (IS_64BIT_PROCESS(p)) {
4163 			error = copyin(argp, &lv, sizeof(lv));
4164 		} else {
4165 			struct user32_fchecklv lv32 = {};
4166 
4167 			error = copyin(argp, &lv32, sizeof(lv32));
4168 			lv.lv_file_start = lv32.lv_file_start;
4169 			lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
4170 			lv.lv_error_message_size = lv32.lv_error_message_size;
4171 		}
4172 		if (error) {
4173 			goto outdrop;
4174 		}
4175 
4176 #if CONFIG_MACF
4177 		error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
4178 		    (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
4179 #endif
4180 
4181 		break;
4182 	}
4183 	case F_GETSIGSINFO: {
4184 		struct cs_blob *blob = NULL;
4185 		fgetsigsinfo_t sigsinfo = {};
4186 
4187 		if (fp->f_type != DTYPE_VNODE) {
4188 			error = EBADF;
4189 			goto out;
4190 		}
4191 		vp = (struct vnode *)fp_get_data(fp);
4192 		proc_fdunlock(p);
4193 
4194 		error = vnode_getwithref(vp);
4195 		if (error) {
4196 			goto outdrop;
4197 		}
4198 
4199 		error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
4200 		if (error) {
4201 			vnode_put(vp);
4202 			goto outdrop;
4203 		}
4204 
4205 		blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
4206 		if (blob == NULL) {
4207 			error = ENOENT;
4208 			vnode_put(vp);
4209 			goto outdrop;
4210 		}
4211 		switch (sigsinfo.fg_info_request) {
4212 		case GETSIGSINFO_PLATFORM_BINARY:
4213 			sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
4214 			error = copyout(&sigsinfo.fg_sig_is_platform,
4215 			    (vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
4216 			    sizeof(sigsinfo.fg_sig_is_platform));
4217 			if (error) {
4218 				vnode_put(vp);
4219 				goto outdrop;
4220 			}
4221 			break;
4222 		default:
4223 			error = EINVAL;
4224 			vnode_put(vp);
4225 			goto outdrop;
4226 		}
4227 		vnode_put(vp);
4228 		break;
4229 	}
4230 #if CONFIG_PROTECT
4231 	case F_GETPROTECTIONCLASS: {
4232 		if (fp->f_type != DTYPE_VNODE) {
4233 			error = EBADF;
4234 			goto out;
4235 		}
4236 		vp = (struct vnode *)fp_get_data(fp);
4237 
4238 		proc_fdunlock(p);
4239 
4240 		if (vnode_getwithref(vp)) {
4241 			error = ENOENT;
4242 			goto outdrop;
4243 		}
4244 
4245 		struct vnode_attr va;
4246 
4247 		VATTR_INIT(&va);
4248 		VATTR_WANTED(&va, va_dataprotect_class);
4249 		error = VNOP_GETATTR(vp, &va, &context);
4250 		if (!error) {
4251 			if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
4252 				*retval = va.va_dataprotect_class;
4253 			} else {
4254 				error = ENOTSUP;
4255 			}
4256 		}
4257 
4258 		vnode_put(vp);
4259 		break;
4260 	}
4261 
4262 	case F_SETPROTECTIONCLASS: {
4263 		/* tmp must be a valid PROTECTION_CLASS_* */
4264 		tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4265 
4266 		if (fp->f_type != DTYPE_VNODE) {
4267 			error = EBADF;
4268 			goto out;
4269 		}
4270 		vp = (struct vnode *)fp_get_data(fp);
4271 
4272 		proc_fdunlock(p);
4273 
4274 		if (vnode_getwithref(vp)) {
4275 			error = ENOENT;
4276 			goto outdrop;
4277 		}
4278 
4279 		/* Only go forward if you have write access */
4280 		vfs_context_t ctx = vfs_context_current();
4281 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4282 			vnode_put(vp);
4283 			error = EBADF;
4284 			goto outdrop;
4285 		}
4286 
4287 		struct vnode_attr va;
4288 
4289 		VATTR_INIT(&va);
4290 		VATTR_SET(&va, va_dataprotect_class, tmp);
4291 
4292 		error = VNOP_SETATTR(vp, &va, ctx);
4293 
4294 		vnode_put(vp);
4295 		break;
4296 	}
4297 
4298 	case F_TRANSCODEKEY: {
4299 		if (fp->f_type != DTYPE_VNODE) {
4300 			error = EBADF;
4301 			goto out;
4302 		}
4303 
4304 		vp = (struct vnode *)fp_get_data(fp);
4305 		proc_fdunlock(p);
4306 
4307 		if (vnode_getwithref(vp)) {
4308 			error = ENOENT;
4309 			goto outdrop;
4310 		}
4311 
4312 		cp_key_t k = {
4313 			.len = CP_MAX_WRAPPEDKEYSIZE,
4314 		};
4315 
4316 		k.key = kalloc_data(CP_MAX_WRAPPEDKEYSIZE, Z_WAITOK | Z_ZERO);
4317 		if (k.key == NULL) {
4318 			error = ENOMEM;
4319 		} else {
4320 			error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
4321 		}
4322 
4323 		vnode_put(vp);
4324 
4325 		if (error == 0) {
4326 			error = copyout(k.key, argp, k.len);
4327 			*retval = k.len;
4328 		}
4329 		kfree_data(k.key, CP_MAX_WRAPPEDKEYSIZE);
4330 
4331 		break;
4332 	}
4333 
4334 	case F_GETPROTECTIONLEVEL:  {
4335 		if (fp->f_type != DTYPE_VNODE) {
4336 			error = EBADF;
4337 			goto out;
4338 		}
4339 
4340 		vp = (struct vnode*)fp_get_data(fp);
4341 		proc_fdunlock(p);
4342 
4343 		if (vnode_getwithref(vp)) {
4344 			error = ENOENT;
4345 			goto outdrop;
4346 		}
4347 
4348 		error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
4349 
4350 		vnode_put(vp);
4351 		break;
4352 	}
4353 
4354 	case F_GETDEFAULTPROTLEVEL:  {
4355 		if (fp->f_type != DTYPE_VNODE) {
4356 			error = EBADF;
4357 			goto out;
4358 		}
4359 
4360 		vp = (struct vnode*)fp_get_data(fp);
4361 		proc_fdunlock(p);
4362 
4363 		if (vnode_getwithref(vp)) {
4364 			error = ENOENT;
4365 			goto outdrop;
4366 		}
4367 
4368 		/*
4369 		 * if cp_get_major_vers fails, error will be set to proper errno
4370 		 * and cp_version will still be 0.
4371 		 */
4372 
4373 		error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
4374 
4375 		vnode_put(vp);
4376 		break;
4377 	}
4378 
4379 #endif /* CONFIG_PROTECT */
4380 
4381 	case F_MOVEDATAEXTENTS: {
4382 		struct fileproc *fp2 = NULL;
4383 		struct vnode *src_vp = NULLVP;
4384 		struct vnode *dst_vp = NULLVP;
4385 		/* We need to grab the 2nd FD out of the argments before moving on. */
4386 		int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
4387 
4388 		error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
4389 		if (error) {
4390 			goto out;
4391 		}
4392 
4393 		if (fp->f_type != DTYPE_VNODE) {
4394 			error = EBADF;
4395 			goto out;
4396 		}
4397 
4398 		/*
4399 		 * For now, special case HFS+ and APFS only, since this
4400 		 * is SPI.
4401 		 */
4402 		src_vp = (struct vnode *)fp_get_data(fp);
4403 		if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
4404 			error = ENOTSUP;
4405 			goto out;
4406 		}
4407 
4408 		/*
4409 		 * Get the references before we start acquiring iocounts on the vnodes,
4410 		 * while we still hold the proc fd lock
4411 		 */
4412 		if ((error = fp_lookup(p, fd2, &fp2, 1))) {
4413 			error = EBADF;
4414 			goto out;
4415 		}
4416 		if (fp2->f_type != DTYPE_VNODE) {
4417 			fp_drop(p, fd2, fp2, 1);
4418 			error = EBADF;
4419 			goto out;
4420 		}
4421 		dst_vp = (struct vnode *)fp_get_data(fp2);
4422 		if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
4423 			fp_drop(p, fd2, fp2, 1);
4424 			error = ENOTSUP;
4425 			goto out;
4426 		}
4427 
4428 #if CONFIG_MACF
4429 		/* Re-do MAC checks against the new FD, pass in a fake argument */
4430 		error = mac_file_check_fcntl(kauth_cred_get(), fp2->fp_glob, cmd, 0);
4431 		if (error) {
4432 			fp_drop(p, fd2, fp2, 1);
4433 			goto out;
4434 		}
4435 #endif
4436 		/* Audit the 2nd FD */
4437 		AUDIT_ARG(fd, fd2);
4438 
4439 		proc_fdunlock(p);
4440 
4441 		if (vnode_getwithref(src_vp)) {
4442 			fp_drop(p, fd2, fp2, 0);
4443 			error = ENOENT;
4444 			goto outdrop;
4445 		}
4446 		if (vnode_getwithref(dst_vp)) {
4447 			vnode_put(src_vp);
4448 			fp_drop(p, fd2, fp2, 0);
4449 			error = ENOENT;
4450 			goto outdrop;
4451 		}
4452 
4453 		/*
4454 		 * Basic asserts; validate they are not the same and that
4455 		 * both live on the same filesystem.
4456 		 */
4457 		if (dst_vp == src_vp) {
4458 			vnode_put(src_vp);
4459 			vnode_put(dst_vp);
4460 			fp_drop(p, fd2, fp2, 0);
4461 			error = EINVAL;
4462 			goto outdrop;
4463 		}
4464 
4465 		if (dst_vp->v_mount != src_vp->v_mount) {
4466 			vnode_put(src_vp);
4467 			vnode_put(dst_vp);
4468 			fp_drop(p, fd2, fp2, 0);
4469 			error = EXDEV;
4470 			goto outdrop;
4471 		}
4472 
4473 		/* Now we have a legit pair of FDs.  Go to work */
4474 
4475 		/* Now check for write access to the target files */
4476 		if (vnode_authorize(src_vp, NULLVP,
4477 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4478 			vnode_put(src_vp);
4479 			vnode_put(dst_vp);
4480 			fp_drop(p, fd2, fp2, 0);
4481 			error = EBADF;
4482 			goto outdrop;
4483 		}
4484 
4485 		if (vnode_authorize(dst_vp, NULLVP,
4486 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4487 			vnode_put(src_vp);
4488 			vnode_put(dst_vp);
4489 			fp_drop(p, fd2, fp2, 0);
4490 			error = EBADF;
4491 			goto outdrop;
4492 		}
4493 
4494 		/* Verify that both vps point to files and not directories */
4495 		if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
4496 			error = EINVAL;
4497 			vnode_put(src_vp);
4498 			vnode_put(dst_vp);
4499 			fp_drop(p, fd2, fp2, 0);
4500 			goto outdrop;
4501 		}
4502 
4503 		/*
4504 		 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
4505 		 * We'll pass in our special bit indicating that the new behavior is expected
4506 		 */
4507 
4508 		error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
4509 
4510 		vnode_put(src_vp);
4511 		vnode_put(dst_vp);
4512 		fp_drop(p, fd2, fp2, 0);
4513 		break;
4514 	}
4515 
4516 	/*
4517 	 * SPI for making a file compressed.
4518 	 */
4519 	case F_MAKECOMPRESSED: {
4520 		uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4521 
4522 		if (fp->f_type != DTYPE_VNODE) {
4523 			error = EBADF;
4524 			goto out;
4525 		}
4526 
4527 		vp = (struct vnode*)fp_get_data(fp);
4528 		proc_fdunlock(p);
4529 
4530 		/* get the vnode */
4531 		if (vnode_getwithref(vp)) {
4532 			error = ENOENT;
4533 			goto outdrop;
4534 		}
4535 
4536 		/* Is it a file? */
4537 		if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
4538 			vnode_put(vp);
4539 			error = EBADF;
4540 			goto outdrop;
4541 		}
4542 
4543 		/* invoke ioctl to pass off to FS */
4544 		/* Only go forward if you have write access */
4545 		vfs_context_t ctx = vfs_context_current();
4546 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4547 			vnode_put(vp);
4548 			error = EBADF;
4549 			goto outdrop;
4550 		}
4551 
4552 		error = VNOP_IOCTL(vp, cmd, (caddr_t)&gcounter, 0, &context);
4553 
4554 		vnode_put(vp);
4555 		break;
4556 	}
4557 
4558 	/*
4559 	 * SPI (private) for indicating to a filesystem that subsequent writes to
4560 	 * the open FD will written to the Fastflow.
4561 	 */
4562 	case F_SET_GREEDY_MODE:
4563 	/* intentionally drop through to the same handler as F_SETSTATIC.
4564 	 * both fcntls should pass the argument and their selector into VNOP_IOCTL.
4565 	 */
4566 
4567 	/*
4568 	 * SPI (private) for indicating to a filesystem that subsequent writes to
4569 	 * the open FD will represent static content.
4570 	 */
4571 	case F_SETSTATICCONTENT: {
4572 		caddr_t ioctl_arg = NULL;
4573 
4574 		if (uap->arg) {
4575 			ioctl_arg = (caddr_t) 1;
4576 		}
4577 
4578 		if (fp->f_type != DTYPE_VNODE) {
4579 			error = EBADF;
4580 			goto out;
4581 		}
4582 		vp = (struct vnode *)fp_get_data(fp);
4583 		proc_fdunlock(p);
4584 
4585 		error = vnode_getwithref(vp);
4586 		if (error) {
4587 			error = ENOENT;
4588 			goto outdrop;
4589 		}
4590 
4591 		/* Only go forward if you have write access */
4592 		vfs_context_t ctx = vfs_context_current();
4593 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4594 			vnode_put(vp);
4595 			error = EBADF;
4596 			goto outdrop;
4597 		}
4598 
4599 		error = VNOP_IOCTL(vp, cmd, ioctl_arg, 0, &context);
4600 		(void)vnode_put(vp);
4601 
4602 		break;
4603 	}
4604 
4605 	/*
4606 	 * SPI (private) for indicating to the lower level storage driver that the
4607 	 * subsequent writes should be of a particular IO type (burst, greedy, static),
4608 	 * or other flavors that may be necessary.
4609 	 */
4610 	case F_SETIOTYPE: {
4611 		caddr_t param_ptr;
4612 		uint32_t param;
4613 
4614 		if (uap->arg) {
4615 			/* extract 32 bits of flags from userland */
4616 			param_ptr = (caddr_t) uap->arg;
4617 			param = (uint32_t) param_ptr;
4618 		} else {
4619 			/* If no argument is specified, error out */
4620 			error = EINVAL;
4621 			goto out;
4622 		}
4623 
4624 		/*
4625 		 * Validate the different types of flags that can be specified:
4626 		 * all of them are mutually exclusive for now.
4627 		 */
4628 		switch (param) {
4629 		case F_IOTYPE_ISOCHRONOUS:
4630 			break;
4631 
4632 		default:
4633 			error = EINVAL;
4634 			goto out;
4635 		}
4636 
4637 
4638 		if (fp->f_type != DTYPE_VNODE) {
4639 			error = EBADF;
4640 			goto out;
4641 		}
4642 		vp = (struct vnode *)fp_get_data(fp);
4643 		proc_fdunlock(p);
4644 
4645 		error = vnode_getwithref(vp);
4646 		if (error) {
4647 			error = ENOENT;
4648 			goto outdrop;
4649 		}
4650 
4651 		/* Only go forward if you have write access */
4652 		vfs_context_t ctx = vfs_context_current();
4653 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4654 			vnode_put(vp);
4655 			error = EBADF;
4656 			goto outdrop;
4657 		}
4658 
4659 		error = VNOP_IOCTL(vp, cmd, param_ptr, 0, &context);
4660 		(void)vnode_put(vp);
4661 
4662 		break;
4663 	}
4664 
4665 	/*
4666 	 * Set the vnode pointed to by 'fd'
4667 	 * and tag it as the (potentially future) backing store
4668 	 * for another filesystem
4669 	 */
4670 	case F_SETBACKINGSTORE: {
4671 		if (fp->f_type != DTYPE_VNODE) {
4672 			error = EBADF;
4673 			goto out;
4674 		}
4675 
4676 		vp = (struct vnode *)fp_get_data(fp);
4677 
4678 		if (vp->v_tag != VT_HFS) {
4679 			error = EINVAL;
4680 			goto out;
4681 		}
4682 		proc_fdunlock(p);
4683 
4684 		if (vnode_getwithref(vp)) {
4685 			error = ENOENT;
4686 			goto outdrop;
4687 		}
4688 
4689 		/* only proceed if you have write access */
4690 		vfs_context_t ctx = vfs_context_current();
4691 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4692 			vnode_put(vp);
4693 			error = EBADF;
4694 			goto outdrop;
4695 		}
4696 
4697 
4698 		/* If arg != 0, set, otherwise unset */
4699 		if (uap->arg) {
4700 			error = VNOP_IOCTL(vp, cmd, (caddr_t)1, 0, &context);
4701 		} else {
4702 			error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
4703 		}
4704 
4705 		vnode_put(vp);
4706 		break;
4707 	}
4708 
4709 	/*
4710 	 * like F_GETPATH, but special semantics for
4711 	 * the mobile time machine handler.
4712 	 */
4713 	case F_GETPATH_MTMINFO: {
4714 		char *pathbufp;
4715 		int pathlen;
4716 
4717 		if (fp->f_type != DTYPE_VNODE) {
4718 			error = EBADF;
4719 			goto out;
4720 		}
4721 		vp = (struct vnode *)fp_get_data(fp);
4722 		proc_fdunlock(p);
4723 
4724 		pathlen = MAXPATHLEN;
4725 		pathbufp = zalloc(ZV_NAMEI);
4726 
4727 		if ((error = vnode_getwithref(vp)) == 0) {
4728 			int backingstore = 0;
4729 
4730 			/* Check for error from vn_getpath before moving on */
4731 			if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
4732 				if (vp->v_tag == VT_HFS) {
4733 					error = VNOP_IOCTL(vp, cmd, (caddr_t) &backingstore, 0, &context);
4734 				}
4735 				(void)vnode_put(vp);
4736 
4737 				if (error == 0) {
4738 					error = copyout((caddr_t)pathbufp, argp, pathlen);
4739 				}
4740 				if (error == 0) {
4741 					/*
4742 					 * If the copyout was successful, now check to ensure
4743 					 * that this vnode is not a BACKINGSTORE vnode.  mtmd
4744 					 * wants the path regardless.
4745 					 */
4746 					if (backingstore) {
4747 						error = EBUSY;
4748 					}
4749 				}
4750 			} else {
4751 				(void)vnode_put(vp);
4752 			}
4753 		}
4754 
4755 		zfree(ZV_NAMEI, pathbufp);
4756 		goto outdrop;
4757 	}
4758 
4759 	case F_RECYCLE: {
4760 #if !DEBUG && !DEVELOPMENT
4761 		bool allowed = false;
4762 
4763 		//
4764 		// non-debug and non-development kernels have restrictions
4765 		// on who can all this fcntl.  the process has to be marked
4766 		// with the dataless-manipulator entitlement and either the
4767 		// process or thread have to be marked rapid-aging.
4768 		//
4769 		if (!vfs_context_is_dataless_manipulator(&context)) {
4770 			error = EPERM;
4771 			goto out;
4772 		}
4773 
4774 		proc_t proc = vfs_context_proc(&context);
4775 		if (proc && (proc->p_lflag & P_LRAGE_VNODES)) {
4776 			allowed = true;
4777 		} else {
4778 			thread_t thr = vfs_context_thread(&context);
4779 			if (thr) {
4780 				struct uthread *ut = get_bsdthread_info(thr);
4781 
4782 				if (ut && (ut->uu_flag & UT_RAGE_VNODES)) {
4783 					allowed = true;
4784 				}
4785 			}
4786 		}
4787 		if (!allowed) {
4788 			error = EPERM;
4789 			goto out;
4790 		}
4791 #endif
4792 
4793 		if (fp->f_type != DTYPE_VNODE) {
4794 			error = EBADF;
4795 			goto out;
4796 		}
4797 		vp = (struct vnode *)fp_get_data(fp);
4798 		proc_fdunlock(p);
4799 
4800 		vnode_recycle(vp);
4801 		break;
4802 	}
4803 
4804 	default:
4805 		/*
4806 		 * This is an fcntl() that we d not recognize at this level;
4807 		 * if this is a vnode, we send it down into the VNOP_IOCTL
4808 		 * for this vnode; this can include special devices, and will
4809 		 * effectively overload fcntl() to send ioctl()'s.
4810 		 */
4811 		if ((cmd & IOC_VOID) && (cmd & IOC_INOUT)) {
4812 			error = EINVAL;
4813 			goto out;
4814 		}
4815 
4816 		/* Catch any now-invalid fcntl() selectors */
4817 		switch (cmd) {
4818 		case (int)APFSIOC_REVERT_TO_SNAPSHOT:
4819 		case (int)FSIOC_FIOSEEKHOLE:
4820 		case (int)FSIOC_FIOSEEKDATA:
4821 		case (int)FSIOC_CAS_BSDFLAGS:
4822 		case HFS_GET_BOOT_INFO:
4823 		case HFS_SET_BOOT_INFO:
4824 		case FIOPINSWAP:
4825 		case F_MARKDEPENDENCY:
4826 		case TIOCREVOKE:
4827 		case TIOCREVOKECLEAR:
4828 			error = EINVAL;
4829 			goto out;
4830 		default:
4831 			break;
4832 		}
4833 
4834 		if (fp->f_type != DTYPE_VNODE) {
4835 			error = EBADF;
4836 			goto out;
4837 		}
4838 		vp = (struct vnode *)fp_get_data(fp);
4839 		proc_fdunlock(p);
4840 
4841 		if ((error = vnode_getwithref(vp)) == 0) {
4842 #define STK_PARAMS 128
4843 			char stkbuf[STK_PARAMS] = {0};
4844 			unsigned int size;
4845 			caddr_t data, memp;
4846 			/*
4847 			 * For this to work properly, we have to copy in the
4848 			 * ioctl() cmd argument if there is one; we must also
4849 			 * check that a command parameter, if present, does
4850 			 * not exceed the maximum command length dictated by
4851 			 * the number of bits we have available in the command
4852 			 * to represent a structure length.  Finally, we have
4853 			 * to copy the results back out, if it is that type of
4854 			 * ioctl().
4855 			 */
4856 			size = IOCPARM_LEN(cmd);
4857 			if (size > IOCPARM_MAX) {
4858 				(void)vnode_put(vp);
4859 				error = EINVAL;
4860 				break;
4861 			}
4862 
4863 			memp = NULL;
4864 			if (size > sizeof(stkbuf)) {
4865 				memp = (caddr_t)kalloc_data(size, Z_WAITOK);
4866 				if (memp == 0) {
4867 					(void)vnode_put(vp);
4868 					error = ENOMEM;
4869 					goto outdrop;
4870 				}
4871 				data = memp;
4872 			} else {
4873 				data = &stkbuf[0];
4874 			}
4875 
4876 			if (cmd & IOC_IN) {
4877 				if (size) {
4878 					/* structure */
4879 					error = copyin(argp, data, size);
4880 					if (error) {
4881 						(void)vnode_put(vp);
4882 						if (memp) {
4883 							kfree_data(memp, size);
4884 						}
4885 						goto outdrop;
4886 					}
4887 
4888 					/* Bzero the section beyond that which was needed */
4889 					if (size <= sizeof(stkbuf)) {
4890 						bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
4891 					}
4892 				} else {
4893 					/* int */
4894 					if (is64bit) {
4895 						*(user_addr_t *)data = argp;
4896 					} else {
4897 						*(uint32_t *)data = (uint32_t)argp;
4898 					}
4899 				};
4900 			} else if ((cmd & IOC_OUT) && size) {
4901 				/*
4902 				 * Zero the buffer so the user always
4903 				 * gets back something deterministic.
4904 				 */
4905 				bzero(data, size);
4906 			} else if (cmd & IOC_VOID) {
4907 				if (is64bit) {
4908 					*(user_addr_t *)data = argp;
4909 				} else {
4910 					*(uint32_t *)data = (uint32_t)argp;
4911 				}
4912 			}
4913 
4914 			error = VNOP_IOCTL(vp, cmd, CAST_DOWN(caddr_t, data), 0, &context);
4915 
4916 			(void)vnode_put(vp);
4917 
4918 			/* Copy any output data to user */
4919 			if (error == 0 && (cmd & IOC_OUT) && size) {
4920 				error = copyout(data, argp, size);
4921 			}
4922 			if (memp) {
4923 				kfree_data(memp, size);
4924 			}
4925 		}
4926 		break;
4927 	}
4928 
4929 outdrop:
4930 	return sys_fcntl_outdrop(p, fd, fp, vp, error);
4931 
4932 out:
4933 	return sys_fcntl_out(p, fd, fp, error);
4934 }
4935 
4936 
4937 /*
4938  * sys_close
4939  *
4940  * Description:	The implementation of the close(2) system call
4941  *
4942  * Parameters:	p			Process in whose per process file table
4943  *					the close is to occur
4944  *		uap->fd			fd to be closed
4945  *		retval			<unused>
4946  *
4947  * Returns:	0			Success
4948  *	fp_lookup:EBADF			Bad file descriptor
4949  *      fp_guard_exception:???          Guarded file descriptor
4950  *	close_internal:EBADF
4951  *	close_internal:???              Anything returnable by a per-fileops
4952  *					close function
4953  */
4954 int
sys_close(proc_t p,struct close_args * uap,__unused int32_t * retval)4955 sys_close(proc_t p, struct close_args *uap, __unused int32_t *retval)
4956 {
4957 	__pthread_testcancel(1);
4958 	return close_nocancel(p, uap->fd);
4959 }
4960 
4961 int
sys_close_nocancel(proc_t p,struct close_nocancel_args * uap,__unused int32_t * retval)4962 sys_close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
4963 {
4964 	return close_nocancel(p, uap->fd);
4965 }
4966 
4967 int
close_nocancel(proc_t p,int fd)4968 close_nocancel(proc_t p, int fd)
4969 {
4970 	struct fileproc *fp;
4971 
4972 	AUDIT_SYSCLOSE(p, fd);
4973 
4974 	proc_fdlock(p);
4975 	if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
4976 		proc_fdunlock(p);
4977 		return EBADF;
4978 	}
4979 
4980 	if (fp_isguarded(fp, GUARD_CLOSE)) {
4981 		int error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
4982 		proc_fdunlock(p);
4983 		return error;
4984 	}
4985 
4986 	return fp_close_and_unlock(p, fd, fp, 0);
4987 }
4988 
4989 
4990 /*
4991  * fstat
4992  *
4993  * Description:	Return status information about a file descriptor.
4994  *
4995  * Parameters:	p				The process doing the fstat
4996  *		fd				The fd to stat
4997  *		ub				The user stat buffer
4998  *		xsecurity			The user extended security
4999  *						buffer, or 0 if none
5000  *		xsecurity_size			The size of xsecurity, or 0
5001  *						if no xsecurity
5002  *		isstat64			Flag to indicate 64 bit version
5003  *						for inode size, etc.
5004  *
5005  * Returns:	0				Success
5006  *		EBADF
5007  *		EFAULT
5008  *	fp_lookup:EBADF				Bad file descriptor
5009  *	vnode_getwithref:???
5010  *	copyout:EFAULT
5011  *	vnode_getwithref:???
5012  *	vn_stat:???
5013  *	soo_stat:???
5014  *	pipe_stat:???
5015  *	pshm_stat:???
5016  *	kqueue_stat:???
5017  *
5018  * Notes:	Internal implementation for all other fstat() related
5019  *		functions
5020  *
5021  *		XXX switch on node type is bogus; need a stat in struct
5022  *		XXX fileops instead.
5023  */
5024 static int
fstat(proc_t p,int fd,user_addr_t ub,user_addr_t xsecurity,user_addr_t xsecurity_size,int isstat64)5025 fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity,
5026     user_addr_t xsecurity_size, int isstat64)
5027 {
5028 	struct fileproc *fp;
5029 	union {
5030 		struct stat sb;
5031 		struct stat64 sb64;
5032 	} source;
5033 	union {
5034 		struct user64_stat user64_sb;
5035 		struct user32_stat user32_sb;
5036 		struct user64_stat64 user64_sb64;
5037 		struct user32_stat64 user32_sb64;
5038 	} dest;
5039 	int error, my_size;
5040 	file_type_t type;
5041 	caddr_t data;
5042 	kauth_filesec_t fsec;
5043 	user_size_t xsecurity_bufsize;
5044 	vfs_context_t ctx = vfs_context_current();
5045 	void * sbptr;
5046 
5047 
5048 	AUDIT_ARG(fd, fd);
5049 
5050 	if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
5051 		return error;
5052 	}
5053 	type = fp->f_type;
5054 	data = (caddr_t)fp_get_data(fp);
5055 	fsec = KAUTH_FILESEC_NONE;
5056 
5057 	sbptr = (void *)&source;
5058 
5059 	switch (type) {
5060 	case DTYPE_VNODE:
5061 		if ((error = vnode_getwithref((vnode_t)data)) == 0) {
5062 			/*
5063 			 * If the caller has the file open, and is not
5064 			 * requesting extended security information, we are
5065 			 * going to let them get the basic stat information.
5066 			 */
5067 			if (xsecurity == USER_ADDR_NULL) {
5068 				error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
5069 				    fp->fp_glob->fg_cred);
5070 			} else {
5071 				error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
5072 			}
5073 
5074 			AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
5075 			(void)vnode_put((vnode_t)data);
5076 		}
5077 		break;
5078 
5079 #if SOCKETS
5080 	case DTYPE_SOCKET:
5081 		error = soo_stat((struct socket *)data, sbptr, isstat64);
5082 		break;
5083 #endif /* SOCKETS */
5084 
5085 	case DTYPE_PIPE:
5086 		error = pipe_stat((void *)data, sbptr, isstat64);
5087 		break;
5088 
5089 	case DTYPE_PSXSHM:
5090 		error = pshm_stat((void *)data, sbptr, isstat64);
5091 		break;
5092 
5093 	case DTYPE_KQUEUE:
5094 		error = kqueue_stat((void *)data, sbptr, isstat64, p);
5095 		break;
5096 
5097 	default:
5098 		error = EBADF;
5099 		goto out;
5100 	}
5101 	if (error == 0) {
5102 		caddr_t sbp;
5103 
5104 		if (isstat64 != 0) {
5105 			source.sb64.st_lspare = 0;
5106 			source.sb64.st_qspare[0] = 0LL;
5107 			source.sb64.st_qspare[1] = 0LL;
5108 
5109 			if (IS_64BIT_PROCESS(p)) {
5110 				munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5111 				my_size = sizeof(dest.user64_sb64);
5112 				sbp = (caddr_t)&dest.user64_sb64;
5113 			} else {
5114 				munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5115 				my_size = sizeof(dest.user32_sb64);
5116 				sbp = (caddr_t)&dest.user32_sb64;
5117 			}
5118 		} else {
5119 			source.sb.st_lspare = 0;
5120 			source.sb.st_qspare[0] = 0LL;
5121 			source.sb.st_qspare[1] = 0LL;
5122 			if (IS_64BIT_PROCESS(p)) {
5123 				munge_user64_stat(&source.sb, &dest.user64_sb);
5124 				my_size = sizeof(dest.user64_sb);
5125 				sbp = (caddr_t)&dest.user64_sb;
5126 			} else {
5127 				munge_user32_stat(&source.sb, &dest.user32_sb);
5128 				my_size = sizeof(dest.user32_sb);
5129 				sbp = (caddr_t)&dest.user32_sb;
5130 			}
5131 		}
5132 
5133 		error = copyout(sbp, ub, my_size);
5134 	}
5135 
5136 	/* caller wants extended security information? */
5137 	if (xsecurity != USER_ADDR_NULL) {
5138 		/* did we get any? */
5139 		if (fsec == KAUTH_FILESEC_NONE) {
5140 			if (susize(xsecurity_size, 0) != 0) {
5141 				error = EFAULT;
5142 				goto out;
5143 			}
5144 		} else {
5145 			/* find the user buffer size */
5146 			xsecurity_bufsize = fusize(xsecurity_size);
5147 
5148 			/* copy out the actual data size */
5149 			if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5150 				error = EFAULT;
5151 				goto out;
5152 			}
5153 
5154 			/* if the caller supplied enough room, copy out to it */
5155 			if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
5156 				error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5157 			}
5158 		}
5159 	}
5160 out:
5161 	fp_drop(p, fd, fp, 0);
5162 	if (fsec != NULL) {
5163 		kauth_filesec_free(fsec);
5164 	}
5165 	return error;
5166 }
5167 
5168 
5169 /*
5170  * sys_fstat_extended
5171  *
5172  * Description:	Extended version of fstat supporting returning extended
5173  *		security information
5174  *
5175  * Parameters:	p				The process doing the fstat
5176  *		uap->fd				The fd to stat
5177  *		uap->ub				The user stat buffer
5178  *		uap->xsecurity			The user extended security
5179  *						buffer, or 0 if none
5180  *		uap->xsecurity_size		The size of xsecurity, or 0
5181  *
5182  * Returns:	0				Success
5183  *		!0				Errno (see fstat)
5184  */
5185 int
sys_fstat_extended(proc_t p,struct fstat_extended_args * uap,__unused int32_t * retval)5186 sys_fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
5187 {
5188 	return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
5189 }
5190 
5191 
5192 /*
5193  * sys_fstat
5194  *
5195  * Description:	Get file status for the file associated with fd
5196  *
5197  * Parameters:	p				The process doing the fstat
5198  *		uap->fd				The fd to stat
5199  *		uap->ub				The user stat buffer
5200  *
5201  * Returns:	0				Success
5202  *		!0				Errno (see fstat)
5203  */
5204 int
sys_fstat(proc_t p,struct fstat_args * uap,__unused int32_t * retval)5205 sys_fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
5206 {
5207 	return fstat(p, uap->fd, uap->ub, 0, 0, 0);
5208 }
5209 
5210 
5211 /*
5212  * sys_fstat64_extended
5213  *
5214  * Description:	Extended version of fstat64 supporting returning extended
5215  *		security information
5216  *
5217  * Parameters:	p				The process doing the fstat
5218  *		uap->fd				The fd to stat
5219  *		uap->ub				The user stat buffer
5220  *		uap->xsecurity			The user extended security
5221  *						buffer, or 0 if none
5222  *		uap->xsecurity_size		The size of xsecurity, or 0
5223  *
5224  * Returns:	0				Success
5225  *		!0				Errno (see fstat)
5226  */
5227 int
sys_fstat64_extended(proc_t p,struct fstat64_extended_args * uap,__unused int32_t * retval)5228 sys_fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
5229 {
5230 	return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
5231 }
5232 
5233 
5234 /*
5235  * sys_fstat64
5236  *
5237  * Description:	Get 64 bit version of the file status for the file associated
5238  *		with fd
5239  *
5240  * Parameters:	p				The process doing the fstat
5241  *		uap->fd				The fd to stat
5242  *		uap->ub				The user stat buffer
5243  *
5244  * Returns:	0				Success
5245  *		!0				Errno (see fstat)
5246  */
5247 int
sys_fstat64(proc_t p,struct fstat64_args * uap,__unused int32_t * retval)5248 sys_fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
5249 {
5250 	return fstat(p, uap->fd, uap->ub, 0, 0, 1);
5251 }
5252 
5253 
5254 /*
5255  * sys_fpathconf
5256  *
5257  * Description:	Return pathconf information about a file descriptor.
5258  *
5259  * Parameters:	p				Process making the request
5260  *		uap->fd				fd to get information about
5261  *		uap->name			Name of information desired
5262  *		retval				Pointer to the call return area
5263  *
5264  * Returns:	0				Success
5265  *		EINVAL
5266  *	fp_lookup:EBADF				Bad file descriptor
5267  *	vnode_getwithref:???
5268  *	vn_pathconf:???
5269  *
5270  * Implicit returns:
5271  *		*retval (modified)		Returned information (numeric)
5272  */
5273 int
sys_fpathconf(proc_t p,struct fpathconf_args * uap,int32_t * retval)5274 sys_fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
5275 {
5276 	int fd = uap->fd;
5277 	struct fileproc *fp;
5278 	struct vnode *vp;
5279 	int error = 0;
5280 	file_type_t type;
5281 
5282 
5283 	AUDIT_ARG(fd, uap->fd);
5284 	if ((error = fp_lookup(p, fd, &fp, 0))) {
5285 		return error;
5286 	}
5287 	type = fp->f_type;
5288 
5289 	switch (type) {
5290 	case DTYPE_SOCKET:
5291 		if (uap->name != _PC_PIPE_BUF) {
5292 			error = EINVAL;
5293 			goto out;
5294 		}
5295 		*retval = PIPE_BUF;
5296 		error = 0;
5297 		goto out;
5298 
5299 	case DTYPE_PIPE:
5300 		if (uap->name != _PC_PIPE_BUF) {
5301 			error = EINVAL;
5302 			goto out;
5303 		}
5304 		*retval = PIPE_BUF;
5305 		error = 0;
5306 		goto out;
5307 
5308 	case DTYPE_VNODE:
5309 		vp = (struct vnode *)fp_get_data(fp);
5310 
5311 		if ((error = vnode_getwithref(vp)) == 0) {
5312 			AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5313 
5314 			error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
5315 
5316 			(void)vnode_put(vp);
5317 		}
5318 		goto out;
5319 
5320 	default:
5321 		error = EINVAL;
5322 		goto out;
5323 	}
5324 	/*NOTREACHED*/
5325 out:
5326 	fp_drop(p, fd, fp, 0);
5327 	return error;
5328 }
5329 
5330 /*
5331  * sys_flock
5332  *
5333  * Description:	Apply an advisory lock on a file descriptor.
5334  *
5335  * Parameters:	p				Process making request
5336  *		uap->fd				fd on which the lock is to be
5337  *						attempted
5338  *		uap->how			(Un)Lock bits, including type
5339  *		retval				Pointer to the call return area
5340  *
5341  * Returns:	0				Success
5342  *	fp_getfvp:EBADF				Bad file descriptor
5343  *	fp_getfvp:ENOTSUP			fd does not refer to a vnode
5344  *	vnode_getwithref:???
5345  *	VNOP_ADVLOCK:???
5346  *
5347  * Implicit returns:
5348  *		*retval (modified)		Size of dtable
5349  *
5350  * Notes:	Just attempt to get a record lock of the requested type on
5351  *		the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5352  */
5353 int
sys_flock(proc_t p,struct flock_args * uap,__unused int32_t * retval)5354 sys_flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5355 {
5356 	int fd = uap->fd;
5357 	int how = uap->how;
5358 	struct fileproc *fp;
5359 	struct vnode *vp;
5360 	struct flock lf;
5361 	vfs_context_t ctx = vfs_context_current();
5362 	int error = 0;
5363 
5364 	AUDIT_ARG(fd, uap->fd);
5365 	if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5366 		return error;
5367 	}
5368 	if ((error = vnode_getwithref(vp))) {
5369 		goto out1;
5370 	}
5371 	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5372 
5373 	lf.l_whence = SEEK_SET;
5374 	lf.l_start = 0;
5375 	lf.l_len = 0;
5376 	if (how & LOCK_UN) {
5377 		lf.l_type = F_UNLCK;
5378 		error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5379 		goto out;
5380 	}
5381 	if (how & LOCK_EX) {
5382 		lf.l_type = F_WRLCK;
5383 	} else if (how & LOCK_SH) {
5384 		lf.l_type = F_RDLCK;
5385 	} else {
5386 		error = EBADF;
5387 		goto out;
5388 	}
5389 #if CONFIG_MACF
5390 	error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob, F_SETLK, &lf);
5391 	if (error) {
5392 		goto out;
5393 	}
5394 #endif
5395 	error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
5396 	    (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5397 	    ctx, NULL);
5398 	if (!error) {
5399 		os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
5400 	}
5401 out:
5402 	(void)vnode_put(vp);
5403 out1:
5404 	fp_drop(p, fd, fp, 0);
5405 	return error;
5406 }
5407 
5408 /*
5409  * sys_fileport_makeport
5410  *
5411  * Description: Obtain a Mach send right for a given file descriptor.
5412  *
5413  * Parameters:	p		Process calling fileport
5414  *              uap->fd		The fd to reference
5415  *              uap->portnamep  User address at which to place port name.
5416  *
5417  * Returns:	0		Success.
5418  *              EBADF		Bad file descriptor.
5419  *              EINVAL		File descriptor had type that cannot be sent, misc. other errors.
5420  *              EFAULT		Address at which to store port name is not valid.
5421  *              EAGAIN		Resource shortage.
5422  *
5423  * Implicit returns:
5424  *		On success, name of send right is stored at user-specified address.
5425  */
5426 int
sys_fileport_makeport(proc_t p,struct fileport_makeport_args * uap,__unused int * retval)5427 sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5428     __unused int *retval)
5429 {
5430 	int err;
5431 	int fd = uap->fd;
5432 	user_addr_t user_portaddr = uap->portnamep;
5433 	struct fileproc *fp = FILEPROC_NULL;
5434 	struct fileglob *fg = NULL;
5435 	ipc_port_t fileport;
5436 	mach_port_name_t name = MACH_PORT_NULL;
5437 
5438 	proc_fdlock(p);
5439 	err = fp_lookup(p, fd, &fp, 1);
5440 	if (err != 0) {
5441 		goto out_unlock;
5442 	}
5443 
5444 	fg = fp->fp_glob;
5445 	if (!fg_sendable(fg)) {
5446 		err = EINVAL;
5447 		goto out_unlock;
5448 	}
5449 
5450 	if (fp_isguarded(fp, GUARD_FILEPORT)) {
5451 		err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5452 		goto out_unlock;
5453 	}
5454 
5455 	/* Dropped when port is deallocated */
5456 	fg_ref(p, fg);
5457 
5458 	proc_fdunlock(p);
5459 
5460 	/* Allocate and initialize a port */
5461 	fileport = fileport_alloc(fg);
5462 	if (fileport == IPC_PORT_NULL) {
5463 		fg_drop_live(fg);
5464 		err = EAGAIN;
5465 		goto out;
5466 	}
5467 
5468 	/* Add an entry.  Deallocates port on failure. */
5469 	name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
5470 	if (!MACH_PORT_VALID(name)) {
5471 		err = EINVAL;
5472 		goto out;
5473 	}
5474 
5475 	err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5476 	if (err != 0) {
5477 		goto out;
5478 	}
5479 
5480 	/* Tag the fileglob for debugging purposes */
5481 	lck_mtx_lock_spin(&fg->fg_lock);
5482 	fg->fg_lflags |= FG_PORTMADE;
5483 	lck_mtx_unlock(&fg->fg_lock);
5484 
5485 	fp_drop(p, fd, fp, 0);
5486 
5487 	return 0;
5488 
5489 out_unlock:
5490 	proc_fdunlock(p);
5491 out:
5492 	if (MACH_PORT_VALID(name)) {
5493 		/* Don't care if another thread races us to deallocate the entry */
5494 		(void) mach_port_deallocate(get_task_ipcspace(p->task), name);
5495 	}
5496 
5497 	if (fp != FILEPROC_NULL) {
5498 		fp_drop(p, fd, fp, 0);
5499 	}
5500 
5501 	return err;
5502 }
5503 
5504 void
fileport_releasefg(struct fileglob * fg)5505 fileport_releasefg(struct fileglob *fg)
5506 {
5507 	(void)fg_drop(PROC_NULL, fg);
5508 }
5509 
5510 /*
5511  * fileport_makefd
5512  *
5513  * Description: Obtain the file descriptor for a given Mach send right.
5514  *
5515  * Returns:	0		Success
5516  *		EINVAL		Invalid Mach port name, or port is not for a file.
5517  *	fdalloc:EMFILE
5518  *	fdalloc:ENOMEM		Unable to allocate fileproc or extend file table.
5519  *
5520  * Implicit returns:
5521  *		*retval (modified)		The new descriptor
5522  */
5523 int
fileport_makefd(proc_t p,ipc_port_t port,fileproc_flags_t fp_flags,int * retval)5524 fileport_makefd(proc_t p, ipc_port_t port, fileproc_flags_t fp_flags, int *retval)
5525 {
5526 	struct fileglob *fg;
5527 	struct fileproc *fp = FILEPROC_NULL;
5528 	int fd;
5529 	int err;
5530 
5531 	fg = fileport_port_to_fileglob(port);
5532 	if (fg == NULL) {
5533 		err = EINVAL;
5534 		goto out;
5535 	}
5536 
5537 	fp = fileproc_alloc_init();
5538 
5539 	proc_fdlock(p);
5540 	err = fdalloc(p, 0, &fd);
5541 	if (err != 0) {
5542 		proc_fdunlock(p);
5543 		goto out;
5544 	}
5545 	if (fp_flags) {
5546 		fp->fp_flags |= fp_flags;
5547 	}
5548 
5549 	fp->fp_glob = fg;
5550 	fg_ref(p, fg);
5551 
5552 	procfdtbl_releasefd(p, fd, fp);
5553 	proc_fdunlock(p);
5554 
5555 	*retval = fd;
5556 	err = 0;
5557 out:
5558 	if ((fp != NULL) && (0 != err)) {
5559 		fileproc_free(fp);
5560 	}
5561 
5562 	return err;
5563 }
5564 
5565 /*
5566  * sys_fileport_makefd
5567  *
5568  * Description: Obtain the file descriptor for a given Mach send right.
5569  *
5570  * Parameters:	p		Process calling fileport
5571  *              uap->port	Name of send right to file port.
5572  *
5573  * Returns:	0		Success
5574  *		EINVAL		Invalid Mach port name, or port is not for a file.
5575  *	fdalloc:EMFILE
5576  *	fdalloc:ENOMEM		Unable to allocate fileproc or extend file table.
5577  *
5578  * Implicit returns:
5579  *		*retval (modified)		The new descriptor
5580  */
5581 int
sys_fileport_makefd(proc_t p,struct fileport_makefd_args * uap,int32_t * retval)5582 sys_fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5583 {
5584 	ipc_port_t port = IPC_PORT_NULL;
5585 	mach_port_name_t send = uap->port;
5586 	kern_return_t res;
5587 	int err;
5588 
5589 	res = ipc_object_copyin(get_task_ipcspace(p->task),
5590 	    send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
5591 
5592 	if (res == KERN_SUCCESS) {
5593 		err = fileport_makefd(p, port, FP_CLOEXEC, retval);
5594 	} else {
5595 		err = EINVAL;
5596 	}
5597 
5598 	if (IPC_PORT_NULL != port) {
5599 		ipc_port_release_send(port);
5600 	}
5601 
5602 	return err;
5603 }
5604 
5605 
5606 #pragma mark fileops wrappers
5607 
5608 /*
5609  * fo_read
5610  *
5611  * Description:	Generic fileops read indirected through the fileops pointer
5612  *		in the fileproc structure
5613  *
5614  * Parameters:	fp				fileproc structure pointer
5615  *		uio				user I/O structure pointer
5616  *		flags				FOF_ flags
5617  *		ctx				VFS context for operation
5618  *
5619  * Returns:	0				Success
5620  *		!0				Errno from read
5621  */
5622 int
fo_read(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5623 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5624 {
5625 	return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
5626 }
5627 
5628 int
fo_no_read(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5629 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5630 {
5631 #pragma unused(fp, uio, flags, ctx)
5632 	return ENXIO;
5633 }
5634 
5635 
5636 /*
5637  * fo_write
5638  *
5639  * Description:	Generic fileops write indirected through the fileops pointer
5640  *		in the fileproc structure
5641  *
5642  * Parameters:	fp				fileproc structure pointer
5643  *		uio				user I/O structure pointer
5644  *		flags				FOF_ flags
5645  *		ctx				VFS context for operation
5646  *
5647  * Returns:	0				Success
5648  *		!0				Errno from write
5649  */
5650 int
fo_write(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5651 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5652 {
5653 	return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
5654 }
5655 
5656 int
fo_no_write(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5657 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5658 {
5659 #pragma unused(fp, uio, flags, ctx)
5660 	return ENXIO;
5661 }
5662 
5663 
5664 /*
5665  * fo_ioctl
5666  *
5667  * Description:	Generic fileops ioctl indirected through the fileops pointer
5668  *		in the fileproc structure
5669  *
5670  * Parameters:	fp				fileproc structure pointer
5671  *		com				ioctl command
5672  *		data				pointer to internalized copy
5673  *						of user space ioctl command
5674  *						parameter data in kernel space
5675  *		ctx				VFS context for operation
5676  *
5677  * Returns:	0				Success
5678  *		!0				Errno from ioctl
5679  *
5680  * Locks:	The caller is assumed to have held the proc_fdlock; this
5681  *		function releases and reacquires this lock.  If the caller
5682  *		accesses data protected by this lock prior to calling this
5683  *		function, it will need to revalidate/reacquire any cached
5684  *		protected data obtained prior to the call.
5685  */
5686 int
fo_ioctl(struct fileproc * fp,u_long com,caddr_t data,vfs_context_t ctx)5687 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5688 {
5689 	int error;
5690 
5691 	proc_fdunlock(vfs_context_proc(ctx));
5692 	error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5693 	proc_fdlock(vfs_context_proc(ctx));
5694 	return error;
5695 }
5696 
5697 int
fo_no_ioctl(struct fileproc * fp,u_long com,caddr_t data,vfs_context_t ctx)5698 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5699 {
5700 #pragma unused(fp, com, data, ctx)
5701 	return ENOTTY;
5702 }
5703 
5704 
5705 /*
5706  * fo_select
5707  *
5708  * Description:	Generic fileops select indirected through the fileops pointer
5709  *		in the fileproc structure
5710  *
5711  * Parameters:	fp				fileproc structure pointer
5712  *		which				select which
5713  *		wql				pointer to wait queue list
5714  *		ctx				VFS context for operation
5715  *
5716  * Returns:	0				Success
5717  *		!0				Errno from select
5718  */
5719 int
fo_select(struct fileproc * fp,int which,void * wql,vfs_context_t ctx)5720 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5721 {
5722 	return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
5723 }
5724 
5725 int
fo_no_select(struct fileproc * fp,int which,void * wql,vfs_context_t ctx)5726 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5727 {
5728 #pragma unused(fp, which, wql, ctx)
5729 	return ENOTSUP;
5730 }
5731 
5732 
5733 /*
5734  * fo_close
5735  *
5736  * Description:	Generic fileops close indirected through the fileops pointer
5737  *		in the fileproc structure
5738  *
5739  * Parameters:	fp				fileproc structure pointer for
5740  *						file to close
5741  *		ctx				VFS context for operation
5742  *
5743  * Returns:	0				Success
5744  *		!0				Errno from close
5745  */
5746 int
fo_close(struct fileglob * fg,vfs_context_t ctx)5747 fo_close(struct fileglob *fg, vfs_context_t ctx)
5748 {
5749 	return (*fg->fg_ops->fo_close)(fg, ctx);
5750 }
5751 
5752 
5753 /*
5754  * fo_drain
5755  *
5756  * Description:	Generic fileops kqueue filter indirected through the fileops
5757  *		pointer in the fileproc structure
5758  *
5759  * Parameters:	fp				fileproc structure pointer
5760  *		ctx				VFS context for operation
5761  *
5762  * Returns:	0				Success
5763  *		!0				errno from drain
5764  */
5765 int
fo_drain(struct fileproc * fp,vfs_context_t ctx)5766 fo_drain(struct fileproc *fp, vfs_context_t ctx)
5767 {
5768 	return (*fp->f_ops->fo_drain)(fp, ctx);
5769 }
5770 
5771 int
fo_no_drain(struct fileproc * fp,vfs_context_t ctx)5772 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
5773 {
5774 #pragma unused(fp, ctx)
5775 	return ENOTSUP;
5776 }
5777 
5778 
5779 /*
5780  * fo_kqfilter
5781  *
5782  * Description:	Generic fileops kqueue filter indirected through the fileops
5783  *		pointer in the fileproc structure
5784  *
5785  * Parameters:	fp				fileproc structure pointer
5786  *		kn				pointer to knote to filter on
5787  *
5788  * Returns:	(kn->kn_flags & EV_ERROR)	error in kn->kn_data
5789  *		0				Filter is not active
5790  *		!0				Filter is active
5791  */
5792 int
fo_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)5793 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5794 {
5795 	return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
5796 }
5797 
5798 int
fo_no_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)5799 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5800 {
5801 #pragma unused(fp, kev)
5802 	knote_set_error(kn, ENOTSUP);
5803 	return 0;
5804 }
5805