xref: /xnu-8020.121.3/bsd/kern/kern_descrip.c (revision fdd8201d7b966f0c3ea610489d29bd841d358941)
1 /*
2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  * (c) UNIX System Laboratories, Inc.
33  * All or some portions of this file are derived from material licensed
34  * to the University of California by American Telephone and Telegraph
35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36  * the permission of UNIX System Laboratories, Inc.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. All advertising materials mentioning features or use of this software
47  *    must display the following acknowledgement:
48  *	This product includes software developed by the University of
49  *	California, Berkeley and its contributors.
50  * 4. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)kern_descrip.c	8.8 (Berkeley) 2/14/95
67  */
68 /*
69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70  * support for mandatory and extensible security protections.  This notice
71  * is included in support of clause 2.2 (b) of the Apple Public License,
72  * Version 2.0.
73  */
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/kauth.h>
82 #include <sys/file_internal.h>
83 #include <sys/guarded.h>
84 #include <sys/priv.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/stat.h>
88 #include <sys/ioctl.h>
89 #include <sys/fcntl.h>
90 #include <sys/fsctl.h>
91 #include <sys/malloc.h>
92 #include <sys/mman.h>
93 #include <sys/mount.h>
94 #include <sys/syslog.h>
95 #include <sys/unistd.h>
96 #include <sys/resourcevar.h>
97 #include <sys/aio_kern.h>
98 #include <sys/ev.h>
99 #include <kern/locks.h>
100 #include <sys/uio_internal.h>
101 #include <sys/codesign.h>
102 #include <sys/codedir_internal.h>
103 #include <sys/mount_internal.h>
104 #include <sys/kdebug.h>
105 #include <sys/sysproto.h>
106 #include <sys/pipe.h>
107 #include <sys/spawn.h>
108 #include <sys/cprotect.h>
109 #include <sys/ubc_internal.h>
110 
111 #include <kern/kern_types.h>
112 #include <kern/kalloc.h>
113 #include <kern/waitq.h>
114 #include <kern/ipc_misc.h>
115 #include <kern/ast.h>
116 
117 #include <vm/vm_protos.h>
118 #include <mach/mach_port.h>
119 
120 #include <security/audit/audit.h>
121 #if CONFIG_MACF
122 #include <security/mac_framework.h>
123 #endif
124 
125 #include <stdbool.h>
126 #include <os/atomic_private.h>
127 #include <os/overflow.h>
128 #include <IOKit/IOBSD.h>
129 
130 #define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
131 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
132     mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
133 void ipc_port_release_send(ipc_port_t);
134 
135 void fileport_releasefg(struct fileglob *fg);
136 
137 /* flags for fp_close_and_unlock */
138 #define FD_DUP2RESV 1
139 
140 /* We don't want these exported */
141 
142 __private_extern__
143 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
144 
145 /* Conflict wait queue for when selects collide (opaque type) */
146 extern struct waitq select_conflict_queue;
147 
148 #define f_flag fp_glob->fg_flag
149 #define f_type fp_glob->fg_ops->fo_type
150 #define f_cred fp_glob->fg_cred
151 #define f_ops fp_glob->fg_ops
152 #define f_offset fp_glob->fg_offset
153 
154 ZONE_DEFINE_TYPE(fg_zone, "fileglob", struct fileglob, ZC_ZFREE_CLEARMEM);
155 ZONE_DEFINE_ID(ZONE_ID_FILEPROC, "fileproc", struct fileproc, ZC_ZFREE_CLEARMEM);
156 
157 /*
158  * If you need accounting for KM_OFILETABL consider using
159  * KALLOC_HEAP_DEFINE to define a view.
160  */
161 #define KM_OFILETABL KHEAP_DEFAULT
162 
163 /*
164  * Descriptor management.
165  */
166 int nfiles;                     /* actual number of open files */
167 /*
168  * "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
169  */
170 static const struct fileops uninitops;
171 
172 os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
173 static LCK_GRP_DECLARE(file_lck_grp, "file");
174 
175 
176 #pragma mark fileglobs
177 
178 /*!
179  * @function fg_free
180  *
181  * @brief
182  * Free a file structure.
183  */
184 static void
fg_free(struct fileglob * fg)185 fg_free(struct fileglob *fg)
186 {
187 	os_atomic_dec(&nfiles, relaxed);
188 
189 	if (fg->fg_vn_data) {
190 		fg_vn_data_free(fg->fg_vn_data);
191 		fg->fg_vn_data = NULL;
192 	}
193 
194 	kauth_cred_t cred = fg->fg_cred;
195 	if (IS_VALID_CRED(cred)) {
196 		kauth_cred_unref(&cred);
197 		fg->fg_cred = NOCRED;
198 	}
199 	lck_mtx_destroy(&fg->fg_lock, &file_lck_grp);
200 
201 #if CONFIG_MACF && CONFIG_VNGUARD
202 	vng_file_label_destroy(fg);
203 #endif
204 	zfree(fg_zone, fg);
205 }
206 
207 OS_ALWAYS_INLINE
208 void
fg_ref(proc_t p,struct fileglob * fg)209 fg_ref(proc_t p, struct fileglob *fg)
210 {
211 #if DEBUG || DEVELOPMENT
212 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
213 #else
214 	(void)p;
215 #endif
216 	os_ref_retain_raw(&fg->fg_count, &f_refgrp);
217 }
218 
219 void
fg_drop_live(struct fileglob * fg)220 fg_drop_live(struct fileglob *fg)
221 {
222 	os_ref_release_live_raw(&fg->fg_count, &f_refgrp);
223 }
224 
225 int
fg_drop(proc_t p,struct fileglob * fg)226 fg_drop(proc_t p, struct fileglob *fg)
227 {
228 	struct vnode *vp;
229 	struct vfs_context context;
230 	int error = 0;
231 
232 	if (fg == NULL) {
233 		return 0;
234 	}
235 
236 	/* Set up context with cred stashed in fg */
237 	if (p == current_proc()) {
238 		context.vc_thread = current_thread();
239 	} else {
240 		context.vc_thread = NULL;
241 	}
242 	context.vc_ucred = fg->fg_cred;
243 
244 	/*
245 	 * POSIX record locking dictates that any close releases ALL
246 	 * locks owned by this process.  This is handled by setting
247 	 * a flag in the unlock to free ONLY locks obeying POSIX
248 	 * semantics, and not to free BSD-style file locks.
249 	 * If the descriptor was in a message, POSIX-style locks
250 	 * aren't passed with the descriptor.
251 	 */
252 	if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
253 	    (p->p_ladvflag & P_LADVLOCK)) {
254 		struct flock lf = {
255 			.l_whence = SEEK_SET,
256 			.l_type = F_UNLCK,
257 		};
258 
259 		vp = (struct vnode *)fg_get_data(fg);
260 		if ((error = vnode_getwithref(vp)) == 0) {
261 			(void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
262 			(void)vnode_put(vp);
263 		}
264 	}
265 
266 	if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == 0) {
267 		/*
268 		 * Since we ensure that fg->fg_ops is always initialized,
269 		 * it is safe to invoke fo_close on the fg
270 		 */
271 		error = fo_close(fg, &context);
272 
273 		fg_free(fg);
274 	}
275 
276 	return error;
277 }
278 
279 inline
280 void
fg_set_data(struct fileglob * fg,void * fg_data)281 fg_set_data(
282 	struct fileglob *fg,
283 	void *fg_data)
284 {
285 	uintptr_t *store = &fg->fg_data;
286 
287 #if __has_feature(ptrauth_calls)
288 	int type = FILEGLOB_DTYPE(fg);
289 
290 	if (fg_data) {
291 		type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
292 		fg_data = ptrauth_sign_unauthenticated(fg_data,
293 		    ptrauth_key_process_independent_data,
294 		    ptrauth_blend_discriminator(store, type));
295 	}
296 #endif // __has_feature(ptrauth_calls)
297 
298 	*store = (uintptr_t)fg_data;
299 }
300 
301 inline
302 void *
fg_get_data_volatile(struct fileglob * fg)303 fg_get_data_volatile(struct fileglob *fg)
304 {
305 	uintptr_t *store = &fg->fg_data;
306 	void *fg_data = (void *)*store;
307 
308 #if __has_feature(ptrauth_calls)
309 	int type = FILEGLOB_DTYPE(fg);
310 
311 	if (fg_data) {
312 		type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
313 		fg_data = ptrauth_auth_data(fg_data,
314 		    ptrauth_key_process_independent_data,
315 		    ptrauth_blend_discriminator(store, type));
316 	}
317 #endif // __has_feature(ptrauth_calls)
318 
319 	return fg_data;
320 }
321 
322 
323 bool
fg_sendable(struct fileglob * fg)324 fg_sendable(struct fileglob *fg)
325 {
326 	switch (FILEGLOB_DTYPE(fg)) {
327 	case DTYPE_VNODE:
328 	case DTYPE_SOCKET:
329 	case DTYPE_PIPE:
330 	case DTYPE_PSXSHM:
331 	case DTYPE_NETPOLICY:
332 		return (fg->fg_lflags & FG_CONFINED) == 0;
333 
334 	default:
335 		return false;
336 	}
337 }
338 
339 #pragma mark file descriptor table (static helpers)
340 
341 static void
procfdtbl_reservefd(struct proc * p,int fd)342 procfdtbl_reservefd(struct proc * p, int fd)
343 {
344 	p->p_fd.fd_ofiles[fd] = NULL;
345 	p->p_fd.fd_ofileflags[fd] |= UF_RESERVED;
346 }
347 
348 void
procfdtbl_releasefd(struct proc * p,int fd,struct fileproc * fp)349 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
350 {
351 	if (fp != NULL) {
352 		p->p_fd.fd_ofiles[fd] = fp;
353 	}
354 	p->p_fd.fd_ofileflags[fd] &= ~UF_RESERVED;
355 	if ((p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
356 		p->p_fd.fd_ofileflags[fd] &= ~UF_RESVWAIT;
357 		wakeup(&p->p_fd);
358 	}
359 }
360 
361 static void
procfdtbl_waitfd(struct proc * p,int fd)362 procfdtbl_waitfd(struct proc * p, int fd)
363 {
364 	p->p_fd.fd_ofileflags[fd] |= UF_RESVWAIT;
365 	msleep(&p->p_fd, &p->p_fd.fd_lock, PRIBIO, "ftbl_waitfd", NULL);
366 }
367 
368 static void
procfdtbl_clearfd(struct proc * p,int fd)369 procfdtbl_clearfd(struct proc * p, int fd)
370 {
371 	int waiting;
372 
373 	waiting = (p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT);
374 	p->p_fd.fd_ofiles[fd] = NULL;
375 	p->p_fd.fd_ofileflags[fd] = 0;
376 	if (waiting == UF_RESVWAIT) {
377 		wakeup(&p->p_fd);
378 	}
379 }
380 
381 /*
382  * fdrelse
383  *
384  * Description:	Inline utility function to free an fd in a filedesc
385  *
386  * Parameters:	fdp				Pointer to filedesc fd lies in
387  *		fd				fd to free
388  *		reserv				fd should be reserved
389  *
390  * Returns:	void
391  *
392  * Locks:	Assumes proc_fdlock for process pointing to fdp is held by
393  *		the caller
394  */
395 void
fdrelse(struct proc * p,int fd)396 fdrelse(struct proc * p, int fd)
397 {
398 	struct filedesc *fdp = &p->p_fd;
399 	int nfd = 0;
400 
401 	if (fd < fdp->fd_freefile) {
402 		fdp->fd_freefile = fd;
403 	}
404 #if DIAGNOSTIC
405 	if (fd >= fdp->fd_afterlast) {
406 		panic("fdrelse: fd_afterlast inconsistent");
407 	}
408 #endif
409 	procfdtbl_clearfd(p, fd);
410 
411 	nfd = fdp->fd_afterlast;
412 	while (nfd > 0 && fdp->fd_ofiles[nfd - 1] == NULL &&
413 	    !(fdp->fd_ofileflags[nfd - 1] & UF_RESERVED)) {
414 		nfd--;
415 	}
416 	fdp->fd_afterlast = nfd;
417 
418 #if CONFIG_PROC_RESOURCE_LIMITS
419 	fdp->fd_nfiles_open--;
420 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
421 }
422 
423 
424 /*
425  * finishdup
426  *
427  * Description:	Common code for dup, dup2, and fcntl(F_DUPFD).
428  *
429  * Parameters:	p				Process performing the dup
430  *		old				The fd to dup
431  *		new				The fd to dup it to
432  *		fp_flags			Flags to augment the new fp
433  *		retval				Pointer to the call return area
434  *
435  * Returns:	0				Success
436  *		EBADF
437  *		ENOMEM
438  *
439  * Implicit returns:
440  *		*retval (modified)		The new descriptor
441  *
442  * Locks:	Assumes proc_fdlock for process pointing to fdp is held by
443  *		the caller
444  *
445  * Notes:	This function may drop and reacquire this lock; it is unsafe
446  *		for a caller to assume that other state protected by the lock
447  *		has not been subsequently changed out from under it.
448  */
449 static int
finishdup(proc_t p,struct filedesc * fdp,int old,int new,fileproc_flags_t fp_flags,int32_t * retval)450 finishdup(proc_t p, struct filedesc *fdp, int old, int new,
451     fileproc_flags_t fp_flags, int32_t *retval)
452 {
453 	struct fileproc *nfp;
454 	struct fileproc *ofp;
455 #if CONFIG_MACF
456 	int error;
457 	kauth_cred_t cred;
458 #endif
459 
460 #if DIAGNOSTIC
461 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
462 #endif
463 	if ((ofp = fdp->fd_ofiles[old]) == NULL ||
464 	    (fdp->fd_ofileflags[old] & UF_RESERVED)) {
465 		fdrelse(p, new);
466 		return EBADF;
467 	}
468 
469 #if CONFIG_MACF
470 	cred = kauth_cred_proc_ref(p);
471 	error = mac_file_check_dup(cred, ofp->fp_glob, new);
472 	kauth_cred_unref(&cred);
473 
474 	if (error) {
475 		fdrelse(p, new);
476 		return error;
477 	}
478 #endif
479 
480 	fg_ref(p, ofp->fp_glob);
481 
482 	proc_fdunlock(p);
483 
484 	nfp = fileproc_alloc_init();
485 
486 	if (fp_flags) {
487 		nfp->fp_flags |= fp_flags;
488 	}
489 	nfp->fp_glob = ofp->fp_glob;
490 
491 	proc_fdlock(p);
492 
493 #if DIAGNOSTIC
494 	if (fdp->fd_ofiles[new] != 0) {
495 		panic("finishdup: overwriting fd_ofiles with new %d", new);
496 	}
497 	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
498 		panic("finishdup: unreserved fileflags with new %d", new);
499 	}
500 #endif
501 
502 	if (new >= fdp->fd_afterlast) {
503 		fdp->fd_afterlast = new + 1;
504 	}
505 	procfdtbl_releasefd(p, new, nfp);
506 	*retval = new;
507 	return 0;
508 }
509 
510 
511 #pragma mark file descriptor table (exported functions)
512 
513 void
proc_dirs_lock_shared(proc_t p)514 proc_dirs_lock_shared(proc_t p)
515 {
516 	lck_rw_lock_shared(&p->p_fd.fd_dirs_lock);
517 }
518 
519 void
proc_dirs_unlock_shared(proc_t p)520 proc_dirs_unlock_shared(proc_t p)
521 {
522 	lck_rw_unlock_shared(&p->p_fd.fd_dirs_lock);
523 }
524 
525 void
proc_dirs_lock_exclusive(proc_t p)526 proc_dirs_lock_exclusive(proc_t p)
527 {
528 	lck_rw_lock_exclusive(&p->p_fd.fd_dirs_lock);
529 }
530 
531 void
proc_dirs_unlock_exclusive(proc_t p)532 proc_dirs_unlock_exclusive(proc_t p)
533 {
534 	lck_rw_unlock_exclusive(&p->p_fd.fd_dirs_lock);
535 }
536 
537 /*
538  * proc_fdlock, proc_fdlock_spin
539  *
540  * Description:	Lock to control access to the per process struct fileproc
541  *		and struct filedesc
542  *
543  * Parameters:	p				Process to take the lock on
544  *
545  * Returns:	void
546  *
547  * Notes:	The lock is initialized in forkproc() and destroyed in
548  *		reap_child_process().
549  */
550 void
proc_fdlock(proc_t p)551 proc_fdlock(proc_t p)
552 {
553 	lck_mtx_lock(&p->p_fd.fd_lock);
554 }
555 
556 void
proc_fdlock_spin(proc_t p)557 proc_fdlock_spin(proc_t p)
558 {
559 	lck_mtx_lock_spin(&p->p_fd.fd_lock);
560 }
561 
562 void
proc_fdlock_assert(proc_t p,int assertflags)563 proc_fdlock_assert(proc_t p, int assertflags)
564 {
565 	lck_mtx_assert(&p->p_fd.fd_lock, assertflags);
566 }
567 
568 
569 /*
570  * proc_fdunlock
571  *
572  * Description:	Unlock the lock previously locked by a call to proc_fdlock()
573  *
574  * Parameters:	p				Process to drop the lock on
575  *
576  * Returns:	void
577  */
578 void
proc_fdunlock(proc_t p)579 proc_fdunlock(proc_t p)
580 {
581 	lck_mtx_unlock(&p->p_fd.fd_lock);
582 }
583 
584 bool
fdt_available_locked(proc_t p,int n)585 fdt_available_locked(proc_t p, int n)
586 {
587 	struct filedesc *fdp = &p->p_fd;
588 	struct fileproc **fpp;
589 	char *flags;
590 	int i;
591 	int lim = proc_limitgetcur_nofile(p);
592 
593 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
594 		return true;
595 	}
596 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
597 	flags = &fdp->fd_ofileflags[fdp->fd_freefile];
598 	for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
599 		if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
600 			return true;
601 		}
602 	}
603 	return false;
604 }
605 
606 
607 struct fdt_iterator
fdt_next(proc_t p,int fd,bool only_settled)608 fdt_next(proc_t p, int fd, bool only_settled)
609 {
610 	struct fdt_iterator it;
611 	struct filedesc *fdp = &p->p_fd;
612 	struct fileproc *fp;
613 	int nfds = fdp->fd_afterlast;
614 
615 	while (++fd < nfds) {
616 		fp = fdp->fd_ofiles[fd];
617 		if (fp == NULL || fp->fp_glob == NULL) {
618 			continue;
619 		}
620 		if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
621 			continue;
622 		}
623 		it.fdti_fd = fd;
624 		it.fdti_fp = fp;
625 		return it;
626 	}
627 
628 	it.fdti_fd = nfds;
629 	it.fdti_fp = NULL;
630 	return it;
631 }
632 
633 struct fdt_iterator
fdt_prev(proc_t p,int fd,bool only_settled)634 fdt_prev(proc_t p, int fd, bool only_settled)
635 {
636 	struct fdt_iterator it;
637 	struct filedesc *fdp = &p->p_fd;
638 	struct fileproc *fp;
639 
640 	while (--fd >= 0) {
641 		fp = fdp->fd_ofiles[fd];
642 		if (fp == NULL || fp->fp_glob == NULL) {
643 			continue;
644 		}
645 		if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
646 			continue;
647 		}
648 		it.fdti_fd = fd;
649 		it.fdti_fp = fp;
650 		return it;
651 	}
652 
653 	it.fdti_fd = -1;
654 	it.fdti_fp = NULL;
655 	return it;
656 }
657 
658 void
fdt_init(proc_t p)659 fdt_init(proc_t p)
660 {
661 	struct filedesc *fdp = &p->p_fd;
662 
663 	lck_mtx_init(&fdp->fd_kqhashlock, &proc_kqhashlock_grp, &proc_lck_attr);
664 	lck_mtx_init(&fdp->fd_knhashlock, &proc_knhashlock_grp, &proc_lck_attr);
665 	lck_mtx_init(&fdp->fd_lock, &proc_fdmlock_grp, &proc_lck_attr);
666 	lck_rw_init(&fdp->fd_dirs_lock, &proc_dirslock_grp, &proc_lck_attr);
667 }
668 
669 void
fdt_destroy(proc_t p)670 fdt_destroy(proc_t p)
671 {
672 	struct filedesc *fdp = &p->p_fd;
673 
674 	lck_mtx_destroy(&fdp->fd_kqhashlock, &proc_kqhashlock_grp);
675 	lck_mtx_destroy(&fdp->fd_knhashlock, &proc_knhashlock_grp);
676 	lck_mtx_destroy(&fdp->fd_lock, &proc_fdmlock_grp);
677 	lck_rw_destroy(&fdp->fd_dirs_lock, &proc_dirslock_grp);
678 }
679 
680 void
fdt_exec(proc_t p,short posix_spawn_flags)681 fdt_exec(proc_t p, short posix_spawn_flags)
682 {
683 	struct filedesc *fdp = &p->p_fd;
684 	thread_t self = current_thread();
685 	struct uthread *ut = get_bsdthread_info(self);
686 	struct kqworkq *dealloc_kqwq = NULL;
687 
688 	/*
689 	 * If the current thread is bound as a workq/workloop
690 	 * servicing thread, we need to unbind it first.
691 	 */
692 	if (ut->uu_kqr_bound && get_bsdthreadtask_info(self) == p) {
693 		kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
694 	}
695 
696 	/*
697 	 * Deallocate the knotes for this process
698 	 * and mark the tables non-existent so
699 	 * subsequent kqueue closes go faster.
700 	 */
701 	knotes_dealloc(p);
702 	assert(fdp->fd_knlistsize == 0);
703 	assert(fdp->fd_knhashmask == 0);
704 
705 	proc_fdlock(p);
706 
707 	for (int i = fdp->fd_afterlast; i-- > 0;) {
708 		struct fileproc *fp = fdp->fd_ofiles[i];
709 		char *flagp = &fdp->fd_ofileflags[i];
710 		bool inherit_file = true;
711 
712 		if (fp == FILEPROC_NULL) {
713 			continue;
714 		}
715 
716 		/*
717 		 * no file descriptor should be in flux when in exec,
718 		 * because we stopped all other threads
719 		 */
720 		if (*flagp & ~UF_INHERIT) {
721 			panic("file %d/%p in flux during exec of %p", i, fp, p);
722 		}
723 
724 		if (fp->fp_flags & FP_CLOEXEC) {
725 			inherit_file = false;
726 		} else if ((posix_spawn_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) &&
727 		    !(*flagp & UF_INHERIT)) {
728 			/*
729 			 * Reverse the usual semantics of file descriptor
730 			 * inheritance - all of them should be closed
731 			 * except files marked explicitly as "inherit" and
732 			 * not marked close-on-exec.
733 			 */
734 			inherit_file = false;
735 #if CONFIG_MACF
736 		} else if (mac_file_check_inherit(proc_ucred(p), fp->fp_glob)) {
737 			inherit_file = false;
738 #endif
739 		}
740 
741 		*flagp = 0; /* clear UF_INHERIT */
742 
743 		if (!inherit_file) {
744 			fp_close_and_unlock(p, i, fp, 0);
745 			proc_fdlock(p);
746 		}
747 	}
748 
749 	/* release the per-process workq kq */
750 	if (fdp->fd_wqkqueue) {
751 		dealloc_kqwq = fdp->fd_wqkqueue;
752 		fdp->fd_wqkqueue = NULL;
753 	}
754 
755 	proc_fdunlock(p);
756 
757 	/* Anything to free? */
758 	if (dealloc_kqwq) {
759 		kqworkq_dealloc(dealloc_kqwq);
760 	}
761 }
762 
763 
764 int
fdt_fork(struct filedesc * newfdp,proc_t p,vnode_t uth_cdir)765 fdt_fork(struct filedesc *newfdp, proc_t p, vnode_t uth_cdir)
766 {
767 	struct filedesc *fdp = &p->p_fd;
768 	struct fileproc **ofiles;
769 	char *ofileflags;
770 	int n_files, afterlast, freefile;
771 	vnode_t v_dir;
772 #if CONFIG_PROC_RESOURCE_LIMITS
773 	int fd_nfiles_open = 0;
774 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
775 	proc_fdlock(p);
776 
777 	newfdp->fd_flags = (fdp->fd_flags & FILEDESC_FORK_INHERITED_MASK);
778 	newfdp->fd_cmask = fdp->fd_cmask;
779 #if CONFIG_PROC_RESOURCE_LIMITS
780 	newfdp->fd_nfiles_soft_limit = fdp->fd_nfiles_soft_limit;
781 	newfdp->fd_nfiles_hard_limit = fdp->fd_nfiles_hard_limit;
782 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
783 
784 	/*
785 	 * For both fd_cdir and fd_rdir make sure we get
786 	 * a valid reference... if we can't, than set
787 	 * set the pointer(s) to NULL in the child... this
788 	 * will keep us from using a non-referenced vp
789 	 * and allows us to do the vnode_rele only on
790 	 * a properly referenced vp
791 	 */
792 	if ((v_dir = fdp->fd_rdir)) {
793 		if (vnode_getwithref(v_dir) == 0) {
794 			if (vnode_ref(v_dir) == 0) {
795 				newfdp->fd_rdir = v_dir;
796 			}
797 			vnode_put(v_dir);
798 		}
799 		if (newfdp->fd_rdir == NULL) {
800 			/*
801 			 * We couldn't get a new reference on
802 			 * the chroot directory being
803 			 * inherited... this is fatal, since
804 			 * otherwise it would constitute an
805 			 * escape from a chroot environment by
806 			 * the new process.
807 			 */
808 			proc_fdunlock(p);
809 			return EPERM;
810 		}
811 	}
812 
813 	/*
814 	 * If we are running with per-thread current working directories,
815 	 * inherit the new current working directory from the current thread.
816 	 */
817 	if ((v_dir = uth_cdir ? uth_cdir : fdp->fd_cdir)) {
818 		if (vnode_getwithref(v_dir) == 0) {
819 			if (vnode_ref(v_dir) == 0) {
820 				newfdp->fd_cdir = v_dir;
821 			}
822 			vnode_put(v_dir);
823 		}
824 		if (newfdp->fd_cdir == NULL && v_dir == fdp->fd_cdir) {
825 			/*
826 			 * we couldn't get a new reference on
827 			 * the current working directory being
828 			 * inherited... we might as well drop
829 			 * our reference from the parent also
830 			 * since the vnode has gone DEAD making
831 			 * it useless... by dropping it we'll
832 			 * be that much closer to recycling it
833 			 */
834 			vnode_rele(fdp->fd_cdir);
835 			fdp->fd_cdir = NULL;
836 		}
837 	}
838 
839 	/*
840 	 * If the number of open files fits in the internal arrays
841 	 * of the open file structure, use them, otherwise allocate
842 	 * additional memory for the number of descriptors currently
843 	 * in use.
844 	 */
845 	afterlast = fdp->fd_afterlast;
846 	freefile = fdp->fd_freefile;
847 	if (afterlast <= NDFILE) {
848 		n_files = NDFILE;
849 	} else {
850 		n_files = roundup(afterlast, NDEXTENT);
851 	}
852 
853 	proc_fdunlock(p);
854 
855 	ofiles = kheap_alloc(KM_OFILETABL, n_files * OFILESIZE,
856 	    Z_WAITOK | Z_ZERO);
857 	if (ofiles == NULL) {
858 		if (newfdp->fd_cdir) {
859 			vnode_rele(newfdp->fd_cdir);
860 			newfdp->fd_cdir = NULL;
861 		}
862 		if (newfdp->fd_rdir) {
863 			vnode_rele(newfdp->fd_rdir);
864 			newfdp->fd_rdir = NULL;
865 		}
866 		return ENOMEM;
867 	}
868 	ofileflags = (char *)&ofiles[n_files];
869 
870 	proc_fdlock(p);
871 
872 	for (int i = afterlast; i-- > 0;) {
873 		struct fileproc *ofp, *nfp;
874 		char flags;
875 
876 		ofp = fdp->fd_ofiles[i];
877 		flags = fdp->fd_ofileflags[i];
878 
879 		if (ofp == NULL ||
880 		    (ofp->fp_glob->fg_lflags & FG_CONFINED) ||
881 		    (ofp->fp_flags & FP_CLOFORK) ||
882 		    (flags & UF_RESERVED)) {
883 			if (i + 1 == afterlast) {
884 				afterlast = i;
885 			}
886 			if (i < freefile) {
887 				freefile = i;
888 			}
889 
890 			continue;
891 		}
892 
893 		assert(ofp->fp_guard_attrs == 0);
894 		nfp = fileproc_alloc_init();
895 		nfp->fp_glob = ofp->fp_glob;
896 		nfp->fp_flags = (ofp->fp_flags & FP_CLOEXEC);
897 		fg_ref(p, nfp->fp_glob);
898 
899 		ofiles[i] = nfp;
900 #if CONFIG_PROC_RESOURCE_LIMITS
901 		fd_nfiles_open++;
902 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
903 	}
904 
905 	proc_fdunlock(p);
906 
907 	newfdp->fd_ofiles = ofiles;
908 	newfdp->fd_ofileflags = ofileflags;
909 	newfdp->fd_nfiles = n_files;
910 	newfdp->fd_afterlast = afterlast;
911 	newfdp->fd_freefile = freefile;
912 
913 #if CONFIG_PROC_RESOURCE_LIMITS
914 	newfdp->fd_nfiles_open = fd_nfiles_open;
915 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
916 
917 	return 0;
918 }
919 
920 void
fdt_invalidate(proc_t p)921 fdt_invalidate(proc_t p)
922 {
923 	struct filedesc *fdp = &p->p_fd;
924 	struct fileproc *fp, **ofiles;
925 	struct kqworkq *kqwq = NULL;
926 	vnode_t vn1 = NULL, vn2 = NULL;
927 	struct kqwllist *kqhash = NULL;
928 	u_long kqhashmask = 0;
929 	int n_files = 0;
930 
931 	/*
932 	 * deallocate all the knotes up front and claim empty
933 	 * tables to make any subsequent kqueue closes faster.
934 	 */
935 	knotes_dealloc(p);
936 	assert(fdp->fd_knlistsize == 0);
937 	assert(fdp->fd_knhashmask == 0);
938 
939 	/*
940 	 * dealloc all workloops that have outstanding retains
941 	 * when created with scheduling parameters.
942 	 */
943 	kqworkloops_dealloc(p);
944 
945 	proc_fdlock(p);
946 
947 	/* close file descriptors */
948 	if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
949 		for (int i = fdp->fd_afterlast; i-- > 0;) {
950 			if ((fp = fdp->fd_ofiles[i]) != NULL) {
951 				if (fdp->fd_ofileflags[i] & UF_RESERVED) {
952 					panic("fdfree: found fp with UF_RESERVED");
953 				}
954 				fp_close_and_unlock(p, i, fp, 0);
955 				proc_fdlock(p);
956 			}
957 		}
958 	}
959 
960 	n_files = fdp->fd_nfiles;
961 	ofiles = fdp->fd_ofiles;
962 	kqwq = fdp->fd_wqkqueue;
963 	vn1 = fdp->fd_cdir;
964 	vn2 = fdp->fd_rdir;
965 
966 	fdp->fd_ofileflags = NULL;
967 	fdp->fd_ofiles = NULL;
968 	fdp->fd_nfiles = 0;
969 	fdp->fd_wqkqueue = NULL;
970 	fdp->fd_cdir = NULL;
971 	fdp->fd_rdir = NULL;
972 
973 	proc_fdunlock(p);
974 
975 	lck_mtx_lock(&fdp->fd_knhashlock);
976 
977 	kqhash = fdp->fd_kqhash;
978 	kqhashmask = fdp->fd_kqhashmask;
979 
980 	fdp->fd_kqhash = 0;
981 	fdp->fd_kqhashmask = 0;
982 
983 	lck_mtx_unlock(&fdp->fd_knhashlock);
984 
985 	kheap_free(KM_OFILETABL, ofiles, n_files * OFILESIZE);
986 
987 	if (kqwq) {
988 		kqworkq_dealloc(kqwq);
989 	}
990 	if (vn1) {
991 		vnode_rele(vn1);
992 	}
993 	if (vn2) {
994 		vnode_rele(vn2);
995 	}
996 	if (kqhash) {
997 		for (uint32_t i = 0; i <= kqhashmask; i++) {
998 			assert(LIST_EMPTY(&kqhash[i]));
999 		}
1000 		hashdestroy(kqhash, M_KQUEUE, kqhashmask);
1001 	}
1002 }
1003 
1004 
1005 struct fileproc *
fileproc_alloc_init(void)1006 fileproc_alloc_init(void)
1007 {
1008 	struct fileproc *fp;
1009 
1010 	fp = zalloc_id(ZONE_ID_FILEPROC, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1011 	os_ref_init(&fp->fp_iocount, &f_refgrp);
1012 	return fp;
1013 }
1014 
1015 
1016 void
fileproc_free(struct fileproc * fp)1017 fileproc_free(struct fileproc *fp)
1018 {
1019 	os_ref_count_t __unused refc = os_ref_release(&fp->fp_iocount);
1020 #if DEVELOPMENT || DEBUG
1021 	if (0 != refc) {
1022 		panic("%s: pid %d refc: %u != 0",
1023 		    __func__, proc_pid(current_proc()), refc);
1024 	}
1025 #endif
1026 	if (fp->fp_guard_attrs) {
1027 		guarded_fileproc_unguard(fp);
1028 	}
1029 	assert(fp->fp_wset == NULL);
1030 	zfree_id(ZONE_ID_FILEPROC, fp);
1031 }
1032 
1033 
1034 /*
1035  * Statistics counter for the number of times a process calling fdalloc()
1036  * has resulted in an expansion of the per process open file table.
1037  *
1038  * XXX This would likely be of more use if it were per process
1039  */
1040 int fdexpand;
1041 
1042 #if CONFIG_PROC_RESOURCE_LIMITS
1043 /*
1044  * Should be called only with the proc_fdlock held.
1045  */
1046 void
fd_check_limit_exceeded(struct filedesc * fdp)1047 fd_check_limit_exceeded(struct filedesc *fdp)
1048 {
1049 #if DIAGNOSTIC
1050 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1051 #endif
1052 	if (!fd_above_soft_limit_notify(fdp) && fdp->fd_nfiles_soft_limit &&
1053 	    (fdp->fd_nfiles_open > fdp->fd_nfiles_soft_limit)) {
1054 		fd_above_soft_limit_send_notification(fdp);
1055 		act_set_astproc_resource(current_thread());
1056 	} else if (!fd_above_hard_limit_notify(fdp) && fdp->fd_nfiles_hard_limit &&
1057 	    (fdp->fd_nfiles_open > fdp->fd_nfiles_hard_limit)) {
1058 		fd_above_hard_limit_send_notification(fdp);
1059 		act_set_astproc_resource(current_thread());
1060 	}
1061 }
1062 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1063 
1064 /*
1065  * fdalloc
1066  *
1067  * Description:	Allocate a file descriptor for the process.
1068  *
1069  * Parameters:	p				Process to allocate the fd in
1070  *		want				The fd we would prefer to get
1071  *		result				Pointer to fd we got
1072  *
1073  * Returns:	0				Success
1074  *		EMFILE
1075  *		ENOMEM
1076  *
1077  * Implicit returns:
1078  *		*result (modified)		The fd which was allocated
1079  */
1080 int
fdalloc(proc_t p,int want,int * result)1081 fdalloc(proc_t p, int want, int *result)
1082 {
1083 	struct filedesc *fdp = &p->p_fd;
1084 	int i;
1085 	int last, numfiles, oldnfiles;
1086 	struct fileproc **newofiles, **ofiles;
1087 	char *newofileflags;
1088 	int lim = proc_limitgetcur_nofile(p);
1089 
1090 	/*
1091 	 * Search for a free descriptor starting at the higher
1092 	 * of want or fd_freefile.  If that fails, consider
1093 	 * expanding the ofile array.
1094 	 */
1095 #if DIAGNOSTIC
1096 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1097 #endif
1098 
1099 	for (;;) {
1100 		last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
1101 		if ((i = want) < fdp->fd_freefile) {
1102 			i = fdp->fd_freefile;
1103 		}
1104 		for (; i < last; i++) {
1105 			if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
1106 				procfdtbl_reservefd(p, i);
1107 				if (i >= fdp->fd_afterlast) {
1108 					fdp->fd_afterlast = i + 1;
1109 				}
1110 				if (want <= fdp->fd_freefile) {
1111 					fdp->fd_freefile = i;
1112 				}
1113 				*result = i;
1114 #if CONFIG_PROC_RESOURCE_LIMITS
1115 				fdp->fd_nfiles_open++;
1116 				fd_check_limit_exceeded(fdp);
1117 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1118 				return 0;
1119 			}
1120 		}
1121 
1122 		/*
1123 		 * No space in current array.  Expand?
1124 		 */
1125 		if ((rlim_t)fdp->fd_nfiles >= lim) {
1126 			return EMFILE;
1127 		}
1128 		if (fdp->fd_nfiles < NDEXTENT) {
1129 			numfiles = NDEXTENT;
1130 		} else {
1131 			numfiles = 2 * fdp->fd_nfiles;
1132 		}
1133 		/* Enforce lim */
1134 		if ((rlim_t)numfiles > lim) {
1135 			numfiles = (int)lim;
1136 		}
1137 		proc_fdunlock(p);
1138 		newofiles = kheap_alloc(KM_OFILETABL, numfiles * OFILESIZE,
1139 		    Z_WAITOK);
1140 		proc_fdlock(p);
1141 		if (newofiles == NULL) {
1142 			return ENOMEM;
1143 		}
1144 		if (fdp->fd_nfiles >= numfiles) {
1145 			kheap_free(KM_OFILETABL, newofiles, numfiles * OFILESIZE);
1146 			continue;
1147 		}
1148 		newofileflags = (char *) &newofiles[numfiles];
1149 		/*
1150 		 * Copy the existing ofile and ofileflags arrays
1151 		 * and zero the new portion of each array.
1152 		 */
1153 		oldnfiles = fdp->fd_nfiles;
1154 		(void) memcpy(newofiles, fdp->fd_ofiles,
1155 		    oldnfiles * sizeof(*fdp->fd_ofiles));
1156 		(void) memset(&newofiles[oldnfiles], 0,
1157 		    (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
1158 
1159 		(void) memcpy(newofileflags, fdp->fd_ofileflags,
1160 		    oldnfiles * sizeof(*fdp->fd_ofileflags));
1161 		(void) memset(&newofileflags[oldnfiles], 0,
1162 		    (numfiles - oldnfiles) *
1163 		    sizeof(*fdp->fd_ofileflags));
1164 		ofiles = fdp->fd_ofiles;
1165 		fdp->fd_ofiles = newofiles;
1166 		fdp->fd_ofileflags = newofileflags;
1167 		fdp->fd_nfiles = numfiles;
1168 		kheap_free(KM_OFILETABL, ofiles, oldnfiles * OFILESIZE);
1169 		fdexpand++;
1170 	}
1171 }
1172 
1173 
1174 #pragma mark fileprocs
1175 
1176 void
fileproc_modify_vflags(struct fileproc * fp,fileproc_vflags_t vflags,boolean_t clearflags)1177 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
1178 {
1179 	if (clearflags) {
1180 		os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
1181 	} else {
1182 		os_atomic_or(&fp->fp_vflags, vflags, relaxed);
1183 	}
1184 }
1185 
1186 fileproc_vflags_t
fileproc_get_vflags(struct fileproc * fp)1187 fileproc_get_vflags(struct fileproc *fp)
1188 {
1189 	return os_atomic_load(&fp->fp_vflags, relaxed);
1190 }
1191 
1192 /*
1193  * falloc_withinit
1194  *
1195  * Create a new open file structure and allocate
1196  * a file descriptor for the process that refers to it.
1197  *
1198  * Returns:	0			Success
1199  *
1200  * Description:	Allocate an entry in the per process open file table and
1201  *		return the corresponding fileproc and fd.
1202  *
1203  * Parameters:	p				The process in whose open file
1204  *						table the fd is to be allocated
1205  *		resultfp			Pointer to fileproc pointer
1206  *						return area
1207  *		resultfd			Pointer to fd return area
1208  *		ctx				VFS context
1209  *		fp_zalloc			fileproc allocator to use
1210  *		crarg				allocator args
1211  *
1212  * Returns:	0				Success
1213  *		ENFILE				Too many open files in system
1214  *		fdalloc:EMFILE			Too many open files in process
1215  *		fdalloc:ENOMEM			M_OFILETABL zone exhausted
1216  *		ENOMEM				fp_zone or fg_zone zone
1217  *						exhausted
1218  *
1219  * Implicit returns:
1220  *		*resultfd (modified)		Returned fileproc pointer
1221  *		*resultfd (modified)		Returned fd
1222  *
1223  * Notes:	This function takes separate process and context arguments
1224  *		solely to support kern_exec.c; otherwise, it would take
1225  *		neither, and use the vfs_context_current() routine internally.
1226  */
1227 int
falloc_withinit(proc_t p,struct fileproc ** resultfp,int * resultfd,vfs_context_t ctx,fp_initfn_t fp_init,void * initarg)1228 falloc_withinit(proc_t p, struct fileproc **resultfp, int *resultfd,
1229     vfs_context_t ctx, fp_initfn_t fp_init, void *initarg)
1230 {
1231 	struct fileproc *fp;
1232 	struct fileglob *fg;
1233 	int error, nfd;
1234 #if CONFIG_MACF
1235 	kauth_cred_t cred;
1236 #endif
1237 
1238 	/* Make sure we don't go beyond the system-wide limit */
1239 	if (nfiles >= maxfiles) {
1240 		tablefull("file");
1241 		return ENFILE;
1242 	}
1243 
1244 	proc_fdlock(p);
1245 
1246 	/* fdalloc will make sure the process stays below per-process limit */
1247 	if ((error = fdalloc(p, 0, &nfd))) {
1248 		proc_fdunlock(p);
1249 		return error;
1250 	}
1251 
1252 #if CONFIG_MACF
1253 	cred = kauth_cred_proc_ref(p);
1254 	error = mac_file_check_create(cred);
1255 	kauth_cred_unref(&cred);
1256 	if (error) {
1257 		proc_fdunlock(p);
1258 		return error;
1259 	}
1260 #endif
1261 
1262 	/*
1263 	 * Allocate a new file descriptor.
1264 	 * If the process has file descriptor zero open, add to the list
1265 	 * of open files at that point, otherwise put it at the front of
1266 	 * the list of open files.
1267 	 */
1268 	proc_fdunlock(p);
1269 
1270 	fp = fileproc_alloc_init();
1271 	if (fp_init) {
1272 		fp_init(fp, initarg);
1273 	}
1274 
1275 	fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
1276 	lck_mtx_init(&fg->fg_lock, &file_lck_grp, LCK_ATTR_NULL);
1277 
1278 	os_ref_retain_locked(&fp->fp_iocount);
1279 	os_ref_init_raw(&fg->fg_count, &f_refgrp);
1280 	fg->fg_ops = &uninitops;
1281 	fp->fp_glob = fg;
1282 
1283 	kauth_cred_ref(ctx->vc_ucred);
1284 
1285 	fp->f_cred = ctx->vc_ucred;
1286 
1287 	os_atomic_inc(&nfiles, relaxed);
1288 
1289 	proc_fdlock(p);
1290 
1291 	p->p_fd.fd_ofiles[nfd] = fp;
1292 
1293 	proc_fdunlock(p);
1294 
1295 	if (resultfp) {
1296 		*resultfp = fp;
1297 	}
1298 	if (resultfd) {
1299 		*resultfd = nfd;
1300 	}
1301 
1302 	return 0;
1303 }
1304 
1305 int
falloc(proc_t p,struct fileproc ** resultfp,int * resultfd,vfs_context_t ctx)1306 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
1307 {
1308 	return falloc_withinit(p, resultfp, resultfd, ctx, NULL, NULL);
1309 }
1310 
1311 
1312 /*
1313  * fp_free
1314  *
1315  * Description:	Release the fd and free the fileproc associated with the fd
1316  *		in the per process open file table of the specified process;
1317  *		these values must correspond.
1318  *
1319  * Parameters:	p				Process containing fd
1320  *		fd				fd to be released
1321  *		fp				fileproc to be freed
1322  */
1323 void
fp_free(proc_t p,int fd,struct fileproc * fp)1324 fp_free(proc_t p, int fd, struct fileproc * fp)
1325 {
1326 	proc_fdlock_spin(p);
1327 	fdrelse(p, fd);
1328 	proc_fdunlock(p);
1329 
1330 	fg_free(fp->fp_glob);
1331 	os_ref_release_live(&fp->fp_iocount);
1332 	fileproc_free(fp);
1333 }
1334 
1335 
1336 struct fileproc *
fp_get_noref_locked(proc_t p,int fd)1337 fp_get_noref_locked(proc_t p, int fd)
1338 {
1339 	struct filedesc *fdp = &p->p_fd;
1340 	struct fileproc *fp;
1341 
1342 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1343 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1344 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1345 		return NULL;
1346 	}
1347 
1348 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1349 	return fp;
1350 }
1351 
1352 struct fileproc *
fp_get_noref_locked_with_iocount(proc_t p,int fd)1353 fp_get_noref_locked_with_iocount(proc_t p, int fd)
1354 {
1355 	struct filedesc *fdp = &p->p_fd;
1356 	struct fileproc *fp = NULL;
1357 
1358 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1359 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1360 	    os_ref_get_count(&fp->fp_iocount) <= 1 ||
1361 	    ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1362 	    !(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
1363 		panic("%s: caller without an ioccount on fileproc (%d/:%p)",
1364 		    __func__, fd, fp);
1365 	}
1366 
1367 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1368 	return fp;
1369 }
1370 
1371 
1372 /*
1373  * fp_lookup
1374  *
1375  * Description:	Get fileproc pointer for a given fd from the per process
1376  *		open file table of the specified process and if successful,
1377  *		increment the fp_iocount
1378  *
1379  * Parameters:	p				Process in which fd lives
1380  *		fd				fd to get information for
1381  *		resultfp			Pointer to result fileproc
1382  *						pointer area, or 0 if none
1383  *		locked				!0 if the caller holds the
1384  *						proc_fdlock, 0 otherwise
1385  *
1386  * Returns:	0			Success
1387  *		EBADF			Bad file descriptor
1388  *
1389  * Implicit returns:
1390  *		*resultfp (modified)		Fileproc pointer
1391  *
1392  * Locks:	If the argument 'locked' is non-zero, then the caller is
1393  *		expected to have taken and held the proc_fdlock; if it is
1394  *		zero, than this routine internally takes and drops this lock.
1395  */
1396 int
fp_lookup(proc_t p,int fd,struct fileproc ** resultfp,int locked)1397 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
1398 {
1399 	struct filedesc *fdp = &p->p_fd;
1400 	struct fileproc *fp;
1401 
1402 	if (!locked) {
1403 		proc_fdlock_spin(p);
1404 	}
1405 	if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
1406 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1407 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1408 		if (!locked) {
1409 			proc_fdunlock(p);
1410 		}
1411 		return EBADF;
1412 	}
1413 
1414 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1415 	os_ref_retain_locked(&fp->fp_iocount);
1416 
1417 	if (resultfp) {
1418 		*resultfp = fp;
1419 	}
1420 	if (!locked) {
1421 		proc_fdunlock(p);
1422 	}
1423 
1424 	return 0;
1425 }
1426 
1427 
1428 int
fp_get_ftype(proc_t p,int fd,file_type_t ftype,int err,struct fileproc ** fpp)1429 fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
1430 {
1431 	struct filedesc *fdp = &p->p_fd;
1432 	struct fileproc *fp;
1433 
1434 	proc_fdlock_spin(p);
1435 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1436 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1437 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1438 		proc_fdunlock(p);
1439 		return EBADF;
1440 	}
1441 
1442 	if (fp->f_type != ftype) {
1443 		proc_fdunlock(p);
1444 		return err;
1445 	}
1446 
1447 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1448 	os_ref_retain_locked(&fp->fp_iocount);
1449 	proc_fdunlock(p);
1450 
1451 	*fpp = fp;
1452 	return 0;
1453 }
1454 
1455 
1456 /*
1457  * fp_drop
1458  *
1459  * Description:	Drop the I/O reference previously taken by calling fp_lookup
1460  *		et. al.
1461  *
1462  * Parameters:	p				Process in which the fd lives
1463  *		fd				fd associated with the fileproc
1464  *		fp				fileproc on which to set the
1465  *						flag and drop the reference
1466  *		locked				flag to internally take and
1467  *						drop proc_fdlock if it is not
1468  *						already held by the caller
1469  *
1470  * Returns:	0				Success
1471  *		EBADF				Bad file descriptor
1472  *
1473  * Locks:	This function internally takes and drops the proc_fdlock for
1474  *		the supplied process if 'locked' is non-zero, and assumes that
1475  *		the caller already holds this lock if 'locked' is non-zero.
1476  *
1477  * Notes:	The fileproc must correspond to the fd in the supplied proc
1478  */
1479 int
fp_drop(proc_t p,int fd,struct fileproc * fp,int locked)1480 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
1481 {
1482 	struct filedesc *fdp = &p->p_fd;
1483 	int     needwakeup = 0;
1484 
1485 	if (!locked) {
1486 		proc_fdlock_spin(p);
1487 	}
1488 	if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
1489 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1490 	    ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1491 	    !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
1492 		if (!locked) {
1493 			proc_fdunlock(p);
1494 		}
1495 		return EBADF;
1496 	}
1497 
1498 	if (1 == os_ref_release_locked(&fp->fp_iocount)) {
1499 		if (fp->fp_flags & FP_SELCONFLICT) {
1500 			fp->fp_flags &= ~FP_SELCONFLICT;
1501 		}
1502 
1503 		if (fdp->fd_fpdrainwait) {
1504 			fdp->fd_fpdrainwait = 0;
1505 			needwakeup = 1;
1506 		}
1507 	}
1508 	if (!locked) {
1509 		proc_fdunlock(p);
1510 	}
1511 	if (needwakeup) {
1512 		wakeup(&fdp->fd_fpdrainwait);
1513 	}
1514 
1515 	return 0;
1516 }
1517 
1518 
1519 /*
1520  * fileproc_drain
1521  *
1522  * Description:	Drain out pending I/O operations
1523  *
1524  * Parameters:	p				Process closing this file
1525  *		fp				fileproc struct for the open
1526  *						instance on the file
1527  *
1528  * Returns:	void
1529  *
1530  * Locks:	Assumes the caller holds the proc_fdlock
1531  *
1532  * Notes:	For character devices, this occurs on the last close of the
1533  *		device; for all other file descriptors, this occurs on each
1534  *		close to prevent fd's from being closed out from under
1535  *		operations currently in progress and blocked
1536  *
1537  * See Also:    file_vnode(), file_socket(), file_drop(), and the cautions
1538  *		regarding their use and interaction with this function.
1539  */
1540 static void
fileproc_drain(proc_t p,struct fileproc * fp)1541 fileproc_drain(proc_t p, struct fileproc * fp)
1542 {
1543 	struct filedesc *fdp = &p->p_fd;
1544 	struct vfs_context context;
1545 	thread_t thread;
1546 	bool is_current_proc;
1547 
1548 	is_current_proc = (p == current_proc());
1549 
1550 	if (!is_current_proc) {
1551 		proc_lock(p);
1552 		thread = proc_thread(p); /* XXX */
1553 		thread_reference(thread);
1554 		proc_unlock(p);
1555 	} else {
1556 		thread = current_thread();
1557 	}
1558 
1559 	context.vc_thread = thread;
1560 	context.vc_ucred = fp->fp_glob->fg_cred;
1561 
1562 	/* Set the vflag for drain */
1563 	fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
1564 
1565 	while (os_ref_get_count(&fp->fp_iocount) > 1) {
1566 		lck_mtx_convert_spin(&fdp->fd_lock);
1567 
1568 		fo_drain(fp, &context);
1569 		if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
1570 			struct select_set *selset;
1571 
1572 			if (fp->fp_guard_attrs) {
1573 				selset = fp->fp_guard->fpg_wset;
1574 			} else {
1575 				selset = fp->fp_wset;
1576 			}
1577 			if (waitq_wakeup64_all(selset, NO_EVENT64,
1578 			    THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
1579 				panic("bad wait queue for waitq_wakeup64_all %p (%sfp:%p)",
1580 				    selset, fp->fp_guard_attrs ? "guarded " : "", fp);
1581 			}
1582 		}
1583 		if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1584 			if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
1585 			    THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
1586 				panic("bad select_conflict_queue");
1587 			}
1588 		}
1589 		fdp->fd_fpdrainwait = 1;
1590 		msleep(&fdp->fd_fpdrainwait, &fdp->fd_lock, PRIBIO, "fpdrain", NULL);
1591 	}
1592 #if DIAGNOSTIC
1593 	if ((fp->fp_flags & FP_INSELECT) != 0) {
1594 		panic("FP_INSELECT set on drained fp");
1595 	}
1596 #endif
1597 	if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1598 		fp->fp_flags &= ~FP_SELCONFLICT;
1599 	}
1600 
1601 	if (!is_current_proc) {
1602 		thread_deallocate(thread);
1603 	}
1604 }
1605 
1606 
1607 int
fp_close_and_unlock(proc_t p,int fd,struct fileproc * fp,int flags)1608 fp_close_and_unlock(proc_t p, int fd, struct fileproc *fp, int flags)
1609 {
1610 	struct filedesc *fdp = &p->p_fd;
1611 	struct fileglob *fg = fp->fp_glob;
1612 #if CONFIG_MACF
1613 	kauth_cred_t cred;
1614 #endif
1615 
1616 #if DIAGNOSTIC
1617 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1618 #endif
1619 
1620 	/*
1621 	 * Keep most people from finding the filedesc while we are closing it.
1622 	 *
1623 	 * Callers are:
1624 	 *
1625 	 * - dup2() which always waits for UF_RESERVED to clear
1626 	 *
1627 	 * - close/guarded_close/... who will fail the fileproc lookup if
1628 	 *   UF_RESERVED is set,
1629 	 *
1630 	 * - fdexec()/fdfree() who only run once all threads in the proc
1631 	 *   are properly canceled, hence no fileproc in this proc should
1632 	 *   be in flux.
1633 	 *
1634 	 * Which means that neither UF_RESERVED nor UF_CLOSING should be set.
1635 	 *
1636 	 * Callers of fp_get_noref_locked_with_iocount() can still find
1637 	 * this entry so that they can drop their I/O reference despite
1638 	 * not having remembered the fileproc pointer (namely select() and
1639 	 * file_drop()).
1640 	 */
1641 	if (p->p_fd.fd_ofileflags[fd] & (UF_RESERVED | UF_CLOSING)) {
1642 		panic("%s: called with fileproc in flux (%d/:%p)",
1643 		    __func__, fd, fp);
1644 	}
1645 	p->p_fd.fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
1646 
1647 	if ((fp->fp_flags & FP_AIOISSUED) ||
1648 #if CONFIG_MACF
1649 	    (FILEGLOB_DTYPE(fg) == DTYPE_VNODE)
1650 #else
1651 	    kauth_authorize_fileop_has_listeners()
1652 #endif
1653 	    ) {
1654 		proc_fdunlock(p);
1655 
1656 		if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1657 			/*
1658 			 * call out to allow 3rd party notification of close.
1659 			 * Ignore result of kauth_authorize_fileop call.
1660 			 */
1661 #if CONFIG_MACF
1662 			cred = kauth_cred_proc_ref(p);
1663 			mac_file_notify_close(cred, fp->fp_glob);
1664 			kauth_cred_unref(&cred);
1665 #endif
1666 
1667 			if (kauth_authorize_fileop_has_listeners() &&
1668 			    vnode_getwithref((vnode_t)fg_get_data(fg)) == 0) {
1669 				u_int   fileop_flags = 0;
1670 				if (fg->fg_flag & FWASWRITTEN) {
1671 					fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
1672 				}
1673 				kauth_authorize_fileop(fg->fg_cred, KAUTH_FILEOP_CLOSE,
1674 				    (uintptr_t)fg_get_data(fg), (uintptr_t)fileop_flags);
1675 
1676 				vnode_put((vnode_t)fg_get_data(fg));
1677 			}
1678 		}
1679 
1680 		if (fp->fp_flags & FP_AIOISSUED) {
1681 			/*
1682 			 * cancel all async IO requests that can be cancelled.
1683 			 */
1684 			_aio_close( p, fd );
1685 		}
1686 
1687 		proc_fdlock(p);
1688 	}
1689 
1690 	if (fd < fdp->fd_knlistsize) {
1691 		knote_fdclose(p, fd);
1692 	}
1693 
1694 	fileproc_drain(p, fp);
1695 
1696 	if (flags & FD_DUP2RESV) {
1697 		fdp->fd_ofiles[fd] = NULL;
1698 		fdp->fd_ofileflags[fd] &= ~UF_CLOSING;
1699 	} else {
1700 		fdrelse(p, fd);
1701 	}
1702 
1703 	proc_fdunlock(p);
1704 
1705 	if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
1706 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
1707 		    fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fg_get_data(fg)));
1708 	}
1709 
1710 	fileproc_free(fp);
1711 
1712 	return fg_drop(p, fg);
1713 }
1714 
1715 
1716 /*
1717  * dupfdopen
1718  *
1719  * Description:	Duplicate the specified descriptor to a free descriptor;
1720  *		this is the second half of fdopen(), above.
1721  *
1722  * Parameters:	p				current process pointer
1723  *		indx				fd to dup to
1724  *		dfd				fd to dup from
1725  *		mode				mode to set on new fd
1726  *		error				command code
1727  *
1728  * Returns:	0				Success
1729  *		EBADF				Source fd is bad
1730  *		EACCES				Requested mode not allowed
1731  *		!0				'error', if not ENODEV or
1732  *						ENXIO
1733  *
1734  * Notes:	XXX This is not thread safe; see fdopen() above
1735  */
1736 int
dupfdopen(proc_t p,int indx,int dfd,int flags,int error)1737 dupfdopen(proc_t p, int indx, int dfd, int flags, int error)
1738 {
1739 	struct filedesc *fdp = &p->p_fd;
1740 	struct fileproc *wfp;
1741 	struct fileproc *fp;
1742 #if CONFIG_MACF
1743 	int myerror;
1744 #endif
1745 
1746 	/*
1747 	 * If the to-be-dup'd fd number is greater than the allowed number
1748 	 * of file descriptors, or the fd to be dup'd has already been
1749 	 * closed, reject.  Note, check for new == old is necessary as
1750 	 * falloc could allocate an already closed to-be-dup'd descriptor
1751 	 * as the new descriptor.
1752 	 */
1753 	proc_fdlock(p);
1754 
1755 	fp = fdp->fd_ofiles[indx];
1756 	if (dfd < 0 || dfd >= fdp->fd_nfiles ||
1757 	    (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
1758 	    (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
1759 		proc_fdunlock(p);
1760 		return EBADF;
1761 	}
1762 #if CONFIG_MACF
1763 	myerror = mac_file_check_dup(kauth_cred_get(), wfp->fp_glob, dfd);
1764 	if (myerror) {
1765 		proc_fdunlock(p);
1766 		return myerror;
1767 	}
1768 #endif
1769 	/*
1770 	 * There are two cases of interest here.
1771 	 *
1772 	 * For ENODEV simply dup (dfd) to file descriptor
1773 	 * (indx) and return.
1774 	 *
1775 	 * For ENXIO steal away the file structure from (dfd) and
1776 	 * store it in (indx).  (dfd) is effectively closed by
1777 	 * this operation.
1778 	 *
1779 	 * Any other error code is just returned.
1780 	 */
1781 	switch (error) {
1782 	case ENODEV:
1783 		if (fp_isguarded(wfp, GUARD_DUP)) {
1784 			proc_fdunlock(p);
1785 			return EPERM;
1786 		}
1787 
1788 		/*
1789 		 * Check that the mode the file is being opened for is a
1790 		 * subset of the mode of the existing descriptor.
1791 		 */
1792 		if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1793 			proc_fdunlock(p);
1794 			return EACCES;
1795 		}
1796 		if (indx >= fdp->fd_afterlast) {
1797 			fdp->fd_afterlast = indx + 1;
1798 		}
1799 
1800 		if (fp->fp_glob) {
1801 			fg_free(fp->fp_glob);
1802 		}
1803 		fg_ref(p, wfp->fp_glob);
1804 		fp->fp_glob = wfp->fp_glob;
1805 		/*
1806 		 * Historically, open(/dev/fd/<n>) preserves close on fork/exec,
1807 		 * unlike dup(), dup2() or fcntl(F_DUPFD).
1808 		 *
1809 		 * open1() already handled O_CLO{EXEC,FORK}
1810 		 */
1811 		fp->fp_flags |= (wfp->fp_flags & (FP_CLOFORK | FP_CLOEXEC));
1812 
1813 		procfdtbl_releasefd(p, indx, NULL);
1814 		fp_drop(p, indx, fp, 1);
1815 		proc_fdunlock(p);
1816 		return 0;
1817 
1818 	default:
1819 		proc_fdunlock(p);
1820 		return error;
1821 	}
1822 	/* NOTREACHED */
1823 }
1824 
1825 
1826 #pragma mark KPIS (sys/file.h)
1827 
1828 /*
1829  * fg_get_vnode
1830  *
1831  * Description:	Return vnode associated with the file structure, if
1832  *		any.  The lifetime of the returned vnode is bound to
1833  *		the lifetime of the file structure.
1834  *
1835  * Parameters:	fg				Pointer to fileglob to
1836  *						inspect
1837  *
1838  * Returns:	vnode_t
1839  */
1840 vnode_t
fg_get_vnode(struct fileglob * fg)1841 fg_get_vnode(struct fileglob *fg)
1842 {
1843 	if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1844 		return (vnode_t)fg_get_data(fg);
1845 	} else {
1846 		return NULL;
1847 	}
1848 }
1849 
1850 
1851 /*
1852  * fp_getfvp
1853  *
1854  * Description:	Get fileproc and vnode pointer for a given fd from the per
1855  *		process open file table of the specified process, and if
1856  *		successful, increment the fp_iocount
1857  *
1858  * Parameters:	p				Process in which fd lives
1859  *		fd				fd to get information for
1860  *		resultfp			Pointer to result fileproc
1861  *						pointer area, or 0 if none
1862  *		resultvp			Pointer to result vnode pointer
1863  *						area, or 0 if none
1864  *
1865  * Returns:	0				Success
1866  *		EBADF				Bad file descriptor
1867  *		ENOTSUP				fd does not refer to a vnode
1868  *
1869  * Implicit returns:
1870  *		*resultfp (modified)		Fileproc pointer
1871  *		*resultvp (modified)		vnode pointer
1872  *
1873  * Notes:	The resultfp and resultvp fields are optional, and may be
1874  *		independently specified as NULL to skip returning information
1875  *
1876  * Locks:	Internally takes and releases proc_fdlock
1877  */
1878 int
fp_getfvp(proc_t p,int fd,struct fileproc ** resultfp,struct vnode ** resultvp)1879 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
1880 {
1881 	struct fileproc *fp;
1882 	int error;
1883 
1884 	error = fp_get_ftype(p, fd, DTYPE_VNODE, ENOTSUP, &fp);
1885 	if (error == 0) {
1886 		if (resultfp) {
1887 			*resultfp = fp;
1888 		}
1889 		if (resultvp) {
1890 			*resultvp = (struct vnode *)fp_get_data(fp);
1891 		}
1892 	}
1893 
1894 	return error;
1895 }
1896 
1897 
1898 /*
1899  * fp_get_pipe_id
1900  *
1901  * Description:	Get pipe id for a given fd from the per process open file table
1902  *		of the specified process.
1903  *
1904  * Parameters:	p				Process in which fd lives
1905  *		fd				fd to get information for
1906  *		result_pipe_id			Pointer to result pipe id
1907  *
1908  * Returns:	0				Success
1909  *		EIVAL				NULL pointer arguments passed
1910  *		fp_lookup:EBADF			Bad file descriptor
1911  *		ENOTSUP				fd does not refer to a pipe
1912  *
1913  * Implicit returns:
1914  *		*result_pipe_id (modified)	pipe id
1915  *
1916  * Locks:	Internally takes and releases proc_fdlock
1917  */
1918 int
fp_get_pipe_id(proc_t p,int fd,uint64_t * result_pipe_id)1919 fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
1920 {
1921 	struct fileproc *fp = FILEPROC_NULL;
1922 	struct fileglob *fg = NULL;
1923 	int error = 0;
1924 
1925 	if (p == NULL || result_pipe_id == NULL) {
1926 		return EINVAL;
1927 	}
1928 
1929 	proc_fdlock(p);
1930 	if ((error = fp_lookup(p, fd, &fp, 1))) {
1931 		proc_fdunlock(p);
1932 		return error;
1933 	}
1934 	fg = fp->fp_glob;
1935 
1936 	if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
1937 		*result_pipe_id = pipe_id((struct pipe*)fg_get_data(fg));
1938 	} else {
1939 		error = ENOTSUP;
1940 	}
1941 
1942 	fp_drop(p, fd, fp, 1);
1943 	proc_fdunlock(p);
1944 	return error;
1945 }
1946 
1947 
1948 /*
1949  * file_vnode
1950  *
1951  * Description:	Given an fd, look it up in the current process's per process
1952  *		open file table, and return its internal vnode pointer.
1953  *
1954  * Parameters:	fd				fd to obtain vnode from
1955  *		vpp				pointer to vnode return area
1956  *
1957  * Returns:	0				Success
1958  *		EINVAL				The fd does not refer to a
1959  *						vnode fileproc entry
1960  *	fp_lookup:EBADF				Bad file descriptor
1961  *
1962  * Implicit returns:
1963  *		*vpp (modified)			Returned vnode pointer
1964  *
1965  * Locks:	This function internally takes and drops the proc_fdlock for
1966  *		the current process
1967  *
1968  * Notes:	If successful, this function increments the fp_iocount on the
1969  *		fd's corresponding fileproc.
1970  *
1971  *		The fileproc referenced is not returned; because of this, care
1972  *		must be taken to not drop the last reference (e.g. by closing
1973  *		the file).  This is inherently unsafe, since the reference may
1974  *		not be recoverable from the vnode, if there is a subsequent
1975  *		close that destroys the associate fileproc.  The caller should
1976  *		therefore retain their own reference on the fileproc so that
1977  *		the fp_iocount can be dropped subsequently.  Failure to do this
1978  *		can result in the returned pointer immediately becoming invalid
1979  *		following the call.
1980  *
1981  *		Use of this function is discouraged.
1982  */
1983 int
file_vnode(int fd,struct vnode ** vpp)1984 file_vnode(int fd, struct vnode **vpp)
1985 {
1986 	return file_vnode_withvid(fd, vpp, NULL);
1987 }
1988 
1989 
1990 /*
1991  * file_vnode_withvid
1992  *
1993  * Description:	Given an fd, look it up in the current process's per process
1994  *		open file table, and return its internal vnode pointer.
1995  *
1996  * Parameters:	fd				fd to obtain vnode from
1997  *		vpp				pointer to vnode return area
1998  *		vidp				pointer to vid of the returned vnode
1999  *
2000  * Returns:	0				Success
2001  *		EINVAL				The fd does not refer to a
2002  *						vnode fileproc entry
2003  *	fp_lookup:EBADF				Bad file descriptor
2004  *
2005  * Implicit returns:
2006  *		*vpp (modified)			Returned vnode pointer
2007  *
2008  * Locks:	This function internally takes and drops the proc_fdlock for
2009  *		the current process
2010  *
2011  * Notes:	If successful, this function increments the fp_iocount on the
2012  *		fd's corresponding fileproc.
2013  *
2014  *		The fileproc referenced is not returned; because of this, care
2015  *		must be taken to not drop the last reference (e.g. by closing
2016  *		the file).  This is inherently unsafe, since the reference may
2017  *		not be recoverable from the vnode, if there is a subsequent
2018  *		close that destroys the associate fileproc.  The caller should
2019  *		therefore retain their own reference on the fileproc so that
2020  *		the fp_iocount can be dropped subsequently.  Failure to do this
2021  *		can result in the returned pointer immediately becoming invalid
2022  *		following the call.
2023  *
2024  *		Use of this function is discouraged.
2025  */
2026 int
file_vnode_withvid(int fd,struct vnode ** vpp,uint32_t * vidp)2027 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t *vidp)
2028 {
2029 	struct fileproc *fp;
2030 	int error;
2031 
2032 	error = fp_get_ftype(current_proc(), fd, DTYPE_VNODE, EINVAL, &fp);
2033 	if (error == 0) {
2034 		if (vpp) {
2035 			*vpp = (struct vnode *)fp_get_data(fp);
2036 		}
2037 		if (vidp) {
2038 			*vidp = vnode_vid((struct vnode *)fp_get_data(fp));
2039 		}
2040 	}
2041 	return error;
2042 }
2043 
2044 /*
2045  * file_socket
2046  *
2047  * Description:	Given an fd, look it up in the current process's per process
2048  *		open file table, and return its internal socket pointer.
2049  *
2050  * Parameters:	fd				fd to obtain vnode from
2051  *		sp				pointer to socket return area
2052  *
2053  * Returns:	0				Success
2054  *		ENOTSOCK			Not a socket
2055  *		fp_lookup:EBADF			Bad file descriptor
2056  *
2057  * Implicit returns:
2058  *		*sp (modified)			Returned socket pointer
2059  *
2060  * Locks:	This function internally takes and drops the proc_fdlock for
2061  *		the current process
2062  *
2063  * Notes:	If successful, this function increments the fp_iocount on the
2064  *		fd's corresponding fileproc.
2065  *
2066  *		The fileproc referenced is not returned; because of this, care
2067  *		must be taken to not drop the last reference (e.g. by closing
2068  *		the file).  This is inherently unsafe, since the reference may
2069  *		not be recoverable from the socket, if there is a subsequent
2070  *		close that destroys the associate fileproc.  The caller should
2071  *		therefore retain their own reference on the fileproc so that
2072  *		the fp_iocount can be dropped subsequently.  Failure to do this
2073  *		can result in the returned pointer immediately becoming invalid
2074  *		following the call.
2075  *
2076  *		Use of this function is discouraged.
2077  */
2078 int
file_socket(int fd,struct socket ** sp)2079 file_socket(int fd, struct socket **sp)
2080 {
2081 	struct fileproc *fp;
2082 	int error;
2083 
2084 	error = fp_get_ftype(current_proc(), fd, DTYPE_SOCKET, ENOTSOCK, &fp);
2085 	if (error == 0) {
2086 		if (sp) {
2087 			*sp = (struct socket *)fp_get_data(fp);
2088 		}
2089 	}
2090 	return error;
2091 }
2092 
2093 
2094 /*
2095  * file_flags
2096  *
2097  * Description:	Given an fd, look it up in the current process's per process
2098  *		open file table, and return its fileproc's flags field.
2099  *
2100  * Parameters:	fd				fd whose flags are to be
2101  *						retrieved
2102  *		flags				pointer to flags data area
2103  *
2104  * Returns:	0				Success
2105  *		ENOTSOCK			Not a socket
2106  *		fp_lookup:EBADF			Bad file descriptor
2107  *
2108  * Implicit returns:
2109  *		*flags (modified)		Returned flags field
2110  *
2111  * Locks:	This function internally takes and drops the proc_fdlock for
2112  *		the current process
2113  */
2114 int
file_flags(int fd,int * flags)2115 file_flags(int fd, int *flags)
2116 {
2117 	proc_t p = current_proc();
2118 	struct fileproc *fp;
2119 	int error = EBADF;
2120 
2121 	proc_fdlock_spin(p);
2122 	fp = fp_get_noref_locked(p, fd);
2123 	if (fp) {
2124 		*flags = (int)fp->f_flag;
2125 		error = 0;
2126 	}
2127 	proc_fdunlock(p);
2128 
2129 	return error;
2130 }
2131 
2132 
2133 /*
2134  * file_drop
2135  *
2136  * Description:	Drop an iocount reference on an fd, and wake up any waiters
2137  *		for draining (i.e. blocked in fileproc_drain() called during
2138  *		the last attempt to close a file).
2139  *
2140  * Parameters:	fd				fd on which an ioreference is
2141  *						to be dropped
2142  *
2143  * Returns:	0				Success
2144  *
2145  * Description:	Given an fd, look it up in the current process's per process
2146  *		open file table, and drop it's fileproc's fp_iocount by one
2147  *
2148  * Notes:	This is intended as a corresponding operation to the functions
2149  *		file_vnode() and file_socket() operations.
2150  *
2151  *		If the caller can't possibly hold an I/O reference,
2152  *		this function will panic the kernel rather than allowing
2153  *		for memory corruption. Callers should always call this
2154  *		because they acquired an I/O reference on this file before.
2155  *
2156  *		Use of this function is discouraged.
2157  */
2158 int
file_drop(int fd)2159 file_drop(int fd)
2160 {
2161 	struct fileproc *fp;
2162 	proc_t p = current_proc();
2163 	struct filedesc *fdp = &p->p_fd;
2164 	int     needwakeup = 0;
2165 
2166 	proc_fdlock_spin(p);
2167 	fp = fp_get_noref_locked_with_iocount(p, fd);
2168 
2169 	if (1 == os_ref_release_locked(&fp->fp_iocount)) {
2170 		if (fp->fp_flags & FP_SELCONFLICT) {
2171 			fp->fp_flags &= ~FP_SELCONFLICT;
2172 		}
2173 
2174 		if (fdp->fd_fpdrainwait) {
2175 			fdp->fd_fpdrainwait = 0;
2176 			needwakeup = 1;
2177 		}
2178 	}
2179 	proc_fdunlock(p);
2180 
2181 	if (needwakeup) {
2182 		wakeup(&fdp->fd_fpdrainwait);
2183 	}
2184 	return 0;
2185 }
2186 
2187 
2188 int
fd_rdwr(int fd,enum uio_rw rw,uint64_t base,int64_t len,enum uio_seg segflg,off_t offset,int io_flg,int64_t * aresid)2189 fd_rdwr(
2190 	int fd,
2191 	enum uio_rw rw,
2192 	uint64_t base,
2193 	int64_t len,
2194 	enum uio_seg segflg,
2195 	off_t   offset,
2196 	int     io_flg,
2197 	int64_t *aresid)
2198 {
2199 	struct fileproc *fp;
2200 	proc_t  p;
2201 	int error = 0;
2202 	int flags = 0;
2203 	int spacetype;
2204 	uio_t auio = NULL;
2205 	uio_stackbuf_t uio_buf[UIO_SIZEOF(1)];
2206 	struct vfs_context context = *(vfs_context_current());
2207 
2208 	p = current_proc();
2209 
2210 	error = fp_lookup(p, fd, &fp, 0);
2211 	if (error) {
2212 		return error;
2213 	}
2214 
2215 	switch (FILEGLOB_DTYPE(fp->fp_glob)) {
2216 	case DTYPE_VNODE:
2217 	case DTYPE_PIPE:
2218 	case DTYPE_SOCKET:
2219 		break;
2220 	default:
2221 		error = EINVAL;
2222 		goto out;
2223 	}
2224 	if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
2225 		error = EBADF;
2226 		goto out;
2227 	}
2228 
2229 	if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
2230 		error = EBADF;
2231 		goto out;
2232 	}
2233 
2234 	context.vc_ucred = fp->fp_glob->fg_cred;
2235 
2236 	if (UIO_SEG_IS_USER_SPACE(segflg)) {
2237 		spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
2238 	} else {
2239 		spacetype = UIO_SYSSPACE;
2240 	}
2241 
2242 	auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
2243 
2244 	uio_addiov(auio, (user_addr_t)base, (user_size_t)len);
2245 
2246 	if (!(io_flg & IO_APPEND)) {
2247 		flags = FOF_OFFSET;
2248 	}
2249 
2250 	if (rw == UIO_WRITE) {
2251 		user_ssize_t orig_resid = uio_resid(auio);
2252 		error = fo_write(fp, auio, flags, &context);
2253 		if (uio_resid(auio) < orig_resid) {
2254 			os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
2255 		}
2256 	} else {
2257 		error = fo_read(fp, auio, flags, &context);
2258 	}
2259 
2260 	if (aresid) {
2261 		*aresid = uio_resid(auio);
2262 	} else if (uio_resid(auio) && error == 0) {
2263 		error = EIO;
2264 	}
2265 out:
2266 	fp_drop(p, fd, fp, 0);
2267 	return error;
2268 }
2269 
2270 
2271 #pragma mark syscalls
2272 
2273 #ifndef HFS_GET_BOOT_INFO
2274 #define HFS_GET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00004)
2275 #endif
2276 
2277 #ifndef HFS_SET_BOOT_INFO
2278 #define HFS_SET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00005)
2279 #endif
2280 
2281 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
2282 #define APFSIOC_REVERT_TO_SNAPSHOT  _IOW('J', 1, u_int64_t)
2283 #endif
2284 
2285 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
2286 	        (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
2287 	        (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
2288 	        ? 1 : 0)
2289 
2290 /*
2291  * sys_getdtablesize
2292  *
2293  * Description:	Returns the per process maximum size of the descriptor table
2294  *
2295  * Parameters:	p				Process being queried
2296  *		retval				Pointer to the call return area
2297  *
2298  * Returns:	0				Success
2299  *
2300  * Implicit returns:
2301  *		*retval (modified)		Size of dtable
2302  */
2303 int
sys_getdtablesize(proc_t p,__unused struct getdtablesize_args * uap,int32_t * retval)2304 sys_getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
2305 {
2306 	*retval = proc_limitgetcur_nofile(p);
2307 	return 0;
2308 }
2309 
2310 
2311 /*
2312  * check_file_seek_range
2313  *
2314  * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
2315  *
2316  * Parameters:  fl		Flock structure.
2317  *		cur_file_offset	Current offset in the file.
2318  *
2319  * Returns:     0               on Success.
2320  *		EOVERFLOW	on overflow.
2321  *		EINVAL          on offset less than zero.
2322  */
2323 
2324 static int
check_file_seek_range(struct flock * fl,off_t cur_file_offset)2325 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
2326 {
2327 	if (fl->l_whence == SEEK_CUR) {
2328 		/* Check if the start marker is beyond LLONG_MAX. */
2329 		if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
2330 			/* Check if start marker is negative */
2331 			if (fl->l_start < 0) {
2332 				return EINVAL;
2333 			}
2334 			return EOVERFLOW;
2335 		}
2336 		/* Check if the start marker is negative. */
2337 		if (fl->l_start + cur_file_offset < 0) {
2338 			return EINVAL;
2339 		}
2340 		/* Check if end marker is beyond LLONG_MAX. */
2341 		if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
2342 		    cur_file_offset, fl->l_len - 1))) {
2343 			return EOVERFLOW;
2344 		}
2345 		/* Check if the end marker is negative. */
2346 		if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
2347 		    fl->l_len < 0)) {
2348 			return EINVAL;
2349 		}
2350 	} else if (fl->l_whence == SEEK_SET) {
2351 		/* Check if the start marker is negative. */
2352 		if (fl->l_start < 0) {
2353 			return EINVAL;
2354 		}
2355 		/* Check if the end marker is beyond LLONG_MAX. */
2356 		if ((fl->l_len > 0) &&
2357 		    CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
2358 			return EOVERFLOW;
2359 		}
2360 		/* Check if the end marker is negative. */
2361 		if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
2362 			return EINVAL;
2363 		}
2364 	}
2365 	return 0;
2366 }
2367 
2368 
2369 /*
2370  * sys_dup
2371  *
2372  * Description:	Duplicate a file descriptor.
2373  *
2374  * Parameters:	p				Process performing the dup
2375  *		uap->fd				The fd to dup
2376  *		retval				Pointer to the call return area
2377  *
2378  * Returns:	0				Success
2379  *		!0				Errno
2380  *
2381  * Implicit returns:
2382  *		*retval (modified)		The new descriptor
2383  */
2384 int
sys_dup(proc_t p,struct dup_args * uap,int32_t * retval)2385 sys_dup(proc_t p, struct dup_args *uap, int32_t *retval)
2386 {
2387 	struct filedesc *fdp = &p->p_fd;
2388 	int old = uap->fd;
2389 	int new, error;
2390 	struct fileproc *fp;
2391 
2392 	proc_fdlock(p);
2393 	if ((error = fp_lookup(p, old, &fp, 1))) {
2394 		proc_fdunlock(p);
2395 		return error;
2396 	}
2397 	if (fp_isguarded(fp, GUARD_DUP)) {
2398 		error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2399 		(void) fp_drop(p, old, fp, 1);
2400 		proc_fdunlock(p);
2401 		return error;
2402 	}
2403 	if ((error = fdalloc(p, 0, &new))) {
2404 		fp_drop(p, old, fp, 1);
2405 		proc_fdunlock(p);
2406 		return error;
2407 	}
2408 	error = finishdup(p, fdp, old, new, 0, retval);
2409 	fp_drop(p, old, fp, 1);
2410 	proc_fdunlock(p);
2411 
2412 	if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
2413 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
2414 		    new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp_get_data(fp)));
2415 	}
2416 
2417 	return error;
2418 }
2419 
2420 /*
2421  * sys_dup2
2422  *
2423  * Description:	Duplicate a file descriptor to a particular value.
2424  *
2425  * Parameters:	p				Process performing the dup
2426  *		uap->from			The fd to dup
2427  *		uap->to				The fd to dup it to
2428  *		retval				Pointer to the call return area
2429  *
2430  * Returns:	0				Success
2431  *		!0				Errno
2432  *
2433  * Implicit returns:
2434  *		*retval (modified)		The new descriptor
2435  */
2436 int
sys_dup2(proc_t p,struct dup2_args * uap,int32_t * retval)2437 sys_dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
2438 {
2439 	return dup2(p, uap->from, uap->to, retval);
2440 }
2441 
2442 int
dup2(proc_t p,int old,int new,int * retval)2443 dup2(proc_t p, int old, int new, int *retval)
2444 {
2445 	struct filedesc *fdp = &p->p_fd;
2446 	struct fileproc *fp, *nfp;
2447 	int i, error;
2448 
2449 	proc_fdlock(p);
2450 
2451 startover:
2452 	if ((error = fp_lookup(p, old, &fp, 1))) {
2453 		proc_fdunlock(p);
2454 		return error;
2455 	}
2456 	if (fp_isguarded(fp, GUARD_DUP)) {
2457 		error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2458 		(void) fp_drop(p, old, fp, 1);
2459 		proc_fdunlock(p);
2460 		return error;
2461 	}
2462 	if (new < 0 || new >= proc_limitgetcur_nofile(p)) {
2463 		fp_drop(p, old, fp, 1);
2464 		proc_fdunlock(p);
2465 		return EBADF;
2466 	}
2467 	if (old == new) {
2468 		fp_drop(p, old, fp, 1);
2469 		*retval = new;
2470 		proc_fdunlock(p);
2471 		return 0;
2472 	}
2473 	if (new < 0 || new >= fdp->fd_nfiles) {
2474 		if ((error = fdalloc(p, new, &i))) {
2475 			fp_drop(p, old, fp, 1);
2476 			proc_fdunlock(p);
2477 			return error;
2478 		}
2479 		if (new != i) {
2480 			fdrelse(p, i);
2481 			goto closeit;
2482 		}
2483 	} else {
2484 closeit:
2485 		if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
2486 			fp_drop(p, old, fp, 1);
2487 			procfdtbl_waitfd(p, new);
2488 #if DIAGNOSTIC
2489 			proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2490 #endif
2491 			goto startover;
2492 		}
2493 
2494 		if ((nfp = fdp->fd_ofiles[new]) != NULL) {
2495 			if (fp_isguarded(nfp, GUARD_CLOSE)) {
2496 				fp_drop(p, old, fp, 1);
2497 				error = fp_guard_exception(p,
2498 				    new, nfp, kGUARD_EXC_CLOSE);
2499 				proc_fdunlock(p);
2500 				return error;
2501 			}
2502 			(void)fp_close_and_unlock(p, new, nfp, FD_DUP2RESV);
2503 			proc_fdlock(p);
2504 			assert(fdp->fd_ofileflags[new] & UF_RESERVED);
2505 		} else {
2506 #if DIAGNOSTIC
2507 			if (fdp->fd_ofiles[new] != NULL) {
2508 				panic("dup2: no ref on fileproc %d", new);
2509 			}
2510 #endif
2511 			procfdtbl_reservefd(p, new);
2512 		}
2513 	}
2514 #if DIAGNOSTIC
2515 	if (fdp->fd_ofiles[new] != 0) {
2516 		panic("dup2: overwriting fd_ofiles with new %d", new);
2517 	}
2518 	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
2519 		panic("dup2: unreserved fileflags with new %d", new);
2520 	}
2521 #endif
2522 	error = finishdup(p, fdp, old, new, 0, retval);
2523 	fp_drop(p, old, fp, 1);
2524 	proc_fdunlock(p);
2525 
2526 	return error;
2527 }
2528 
2529 
2530 /*
2531  * fcntl
2532  *
2533  * Description:	The file control system call.
2534  *
2535  * Parameters:	p				Process performing the fcntl
2536  *		uap->fd				The fd to operate against
2537  *		uap->cmd			The command to perform
2538  *		uap->arg			Pointer to the command argument
2539  *		retval				Pointer to the call return area
2540  *
2541  * Returns:	0				Success
2542  *		!0				Errno (see fcntl_nocancel)
2543  *
2544  * Implicit returns:
2545  *		*retval (modified)		fcntl return value (if any)
2546  *
2547  * Notes:	This system call differs from fcntl_nocancel() in that it
2548  *		tests for cancellation prior to performing a potentially
2549  *		blocking operation.
2550  */
2551 int
sys_fcntl(proc_t p,struct fcntl_args * uap,int32_t * retval)2552 sys_fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
2553 {
2554 	__pthread_testcancel(1);
2555 	return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
2556 }
2557 
2558 #define ACCOUNT_OPENFROM_ENTITLEMENT \
2559 	"com.apple.private.vfs.role-account-openfrom"
2560 
2561 /*
2562  * sys_fcntl_nocancel
2563  *
2564  * Description:	A non-cancel-testing file control system call.
2565  *
2566  * Parameters:	p				Process performing the fcntl
2567  *		uap->fd				The fd to operate against
2568  *		uap->cmd			The command to perform
2569  *		uap->arg			Pointer to the command argument
2570  *		retval				Pointer to the call return area
2571  *
2572  * Returns:	0				Success
2573  *		EINVAL
2574  *	fp_lookup:EBADF				Bad file descriptor
2575  * [F_DUPFD]
2576  *	fdalloc:EMFILE
2577  *	fdalloc:ENOMEM
2578  *	finishdup:EBADF
2579  *	finishdup:ENOMEM
2580  * [F_SETOWN]
2581  *		ESRCH
2582  * [F_SETLK]
2583  *		EBADF
2584  *		EOVERFLOW
2585  *	copyin:EFAULT
2586  *	vnode_getwithref:???
2587  *	VNOP_ADVLOCK:???
2588  *	msleep:ETIMEDOUT
2589  * [F_GETLK]
2590  *		EBADF
2591  *		EOVERFLOW
2592  *	copyin:EFAULT
2593  *	copyout:EFAULT
2594  *	vnode_getwithref:???
2595  *	VNOP_ADVLOCK:???
2596  * [F_PREALLOCATE]
2597  *		EBADF
2598  *		EINVAL
2599  *	copyin:EFAULT
2600  *	copyout:EFAULT
2601  *	vnode_getwithref:???
2602  *	VNOP_ALLOCATE:???
2603  * [F_SETSIZE,F_RDADVISE]
2604  *		EBADF
2605  *		EINVAL
2606  *	copyin:EFAULT
2607  *	vnode_getwithref:???
2608  * [F_RDAHEAD,F_NOCACHE]
2609  *		EBADF
2610  *	vnode_getwithref:???
2611  * [???]
2612  *
2613  * Implicit returns:
2614  *		*retval (modified)		fcntl return value (if any)
2615  */
2616 #define SYS_FCNTL_DECLARE_VFS_CONTEXT(context) \
2617 	struct vfs_context context = { \
2618 	    .vc_thread = current_thread(), \
2619 	    .vc_ucred = fp->f_cred, \
2620 	}
2621 
2622 static user_addr_t
sys_fnctl_parse_arg(proc_t p,user_long_t arg)2623 sys_fnctl_parse_arg(proc_t p, user_long_t arg)
2624 {
2625 	/*
2626 	 * Since the arg parameter is defined as a long but may be
2627 	 * either a long or a pointer we must take care to handle
2628 	 * sign extension issues.  Our sys call munger will sign
2629 	 * extend a long when we are called from a 32-bit process.
2630 	 * Since we can never have an address greater than 32-bits
2631 	 * from a 32-bit process we lop off the top 32-bits to avoid
2632 	 * getting the wrong address
2633 	 */
2634 	return proc_is64bit(p) ? arg : CAST_USER_ADDR_T((uint32_t)arg);
2635 }
2636 
2637 /* cleanup code common to fnctl functions, for when the fdlock is still held */
2638 static int
sys_fcntl_out(proc_t p,int fd,struct fileproc * fp,int error)2639 sys_fcntl_out(proc_t p, int fd, struct fileproc *fp, int error)
2640 {
2641 	fp_drop(p, fd, fp, 1);
2642 	proc_fdunlock(p);
2643 	return error;
2644 }
2645 
2646 /* cleanup code common to fnctl acting on vnodes, once they unlocked the fdlock */
2647 static int
sys_fcntl_outdrop(proc_t p,int fd,struct fileproc * fp,struct vnode * vp,int error)2648 sys_fcntl_outdrop(proc_t p, int fd, struct fileproc *fp, struct vnode *vp, int error)
2649 {
2650 #pragma unused(vp)
2651 
2652 	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2653 	fp_drop(p, fd, fp, 0);
2654 	return error;
2655 }
2656 
2657 typedef int (*sys_fnctl_handler_t)(proc_t p, int fd, int cmd, user_long_t arg,
2658     struct fileproc *fp, int32_t *retval);
2659 
2660 typedef int (*sys_fnctl_vnode_handler_t)(proc_t p, int fd, int cmd,
2661     user_long_t arg, struct fileproc *fp, struct vnode *vp, int32_t *retval);
2662 
2663 /*
2664  * SPI (private) for opening a file starting from a dir fd
2665  *
2666  * Note: do not inline to keep stack usage under control.
2667  */
2668 __attribute__((noinline))
2669 static int
sys_fcntl__OPENFROM(proc_t p,int fd,int cmd,user_long_t arg,struct fileproc * fp,struct vnode * vp,int32_t * retval)2670 sys_fcntl__OPENFROM(proc_t p, int fd, int cmd, user_long_t arg,
2671     struct fileproc *fp, struct vnode *vp, int32_t *retval)
2672 {
2673 #pragma unused(cmd)
2674 
2675 	user_addr_t argp = sys_fnctl_parse_arg(p, arg);
2676 	struct user_fopenfrom fopen;
2677 	struct vnode_attr *va;
2678 	struct nameidata *nd;
2679 	int error, cmode;
2680 	bool has_entitlement;
2681 
2682 	/* Check if this isn't a valid file descriptor */
2683 	if ((fp->f_flag & FREAD) == 0) {
2684 		return sys_fcntl_out(p, fd, fp, EBADF);
2685 	}
2686 	proc_fdunlock(p);
2687 
2688 	if (vnode_getwithref(vp)) {
2689 		error = ENOENT;
2690 		goto outdrop;
2691 	}
2692 
2693 	/* Only valid for directories */
2694 	if (vp->v_type != VDIR) {
2695 		vnode_put(vp);
2696 		error = ENOTDIR;
2697 		goto outdrop;
2698 	}
2699 
2700 	/*
2701 	 * Only entitled apps may use the credentials of the thread
2702 	 * that opened the file descriptor.
2703 	 * Non-entitled threads will use their own context.
2704 	 */
2705 	has_entitlement = IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT);
2706 
2707 	/* Get flags, mode and pathname arguments. */
2708 	if (IS_64BIT_PROCESS(p)) {
2709 		error = copyin(argp, &fopen, sizeof(fopen));
2710 	} else {
2711 		struct user32_fopenfrom fopen32;
2712 
2713 		error = copyin(argp, &fopen32, sizeof(fopen32));
2714 		fopen.o_flags = fopen32.o_flags;
2715 		fopen.o_mode = fopen32.o_mode;
2716 		fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
2717 	}
2718 	if (error) {
2719 		vnode_put(vp);
2720 		goto outdrop;
2721 	}
2722 
2723 	/* open1() can have really deep stacks, so allocate those */
2724 	va = kalloc_type(struct vnode_attr, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2725 	nd = kalloc_type(struct nameidata, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2726 
2727 	AUDIT_ARG(fflags, fopen.o_flags);
2728 	AUDIT_ARG(mode, fopen.o_mode);
2729 	VATTR_INIT(va);
2730 	/* Mask off all but regular access permissions */
2731 	cmode = ((fopen.o_mode & ~p->p_fd.fd_cmask) & ALLPERMS) & ~S_ISTXT;
2732 	VATTR_SET(va, va_mode, cmode & ACCESSPERMS);
2733 
2734 	SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2735 
2736 	/* Start the lookup relative to the file descriptor's vnode. */
2737 	NDINIT(nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2738 	    fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
2739 	nd->ni_dvp = vp;
2740 
2741 	error = open1(has_entitlement ? &context : vfs_context_current(),
2742 	    nd, fopen.o_flags, va, NULL, NULL, retval);
2743 
2744 	kfree_type(struct vnode_attr, va);
2745 	kfree_type(struct nameidata, nd);
2746 
2747 	vnode_put(vp);
2748 
2749 outdrop:
2750 	return sys_fcntl_outdrop(p, fd, fp, vp, error);
2751 }
2752 
2753 int
sys_fcntl_nocancel(proc_t p,struct fcntl_nocancel_args * uap,int32_t * retval)2754 sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
2755 {
2756 	int fd = uap->fd;
2757 	int cmd = uap->cmd;
2758 	struct filedesc *fdp = &p->p_fd;
2759 	struct fileproc *fp;
2760 	struct vnode *vp = NULLVP;      /* for AUDIT_ARG() at end */
2761 	unsigned int oflags, nflags;
2762 	int i, tmp, error, error2, flg = 0;
2763 	struct flock fl = {};
2764 	struct flocktimeout fltimeout;
2765 	struct timespec *timeout = NULL;
2766 	off_t offset;
2767 	int newmin;
2768 	daddr64_t lbn, bn;
2769 	unsigned int fflag;
2770 	user_addr_t argp;
2771 	boolean_t is64bit;
2772 	int has_entitlement = 0;
2773 
2774 	AUDIT_ARG(fd, uap->fd);
2775 	AUDIT_ARG(cmd, uap->cmd);
2776 
2777 	proc_fdlock(p);
2778 	if ((error = fp_lookup(p, fd, &fp, 1))) {
2779 		proc_fdunlock(p);
2780 		return error;
2781 	}
2782 
2783 	SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2784 
2785 	is64bit = proc_is64bit(p);
2786 	if (is64bit) {
2787 		argp = uap->arg;
2788 	} else {
2789 		/*
2790 		 * Since the arg parameter is defined as a long but may be
2791 		 * either a long or a pointer we must take care to handle
2792 		 * sign extension issues.  Our sys call munger will sign
2793 		 * extend a long when we are called from a 32-bit process.
2794 		 * Since we can never have an address greater than 32-bits
2795 		 * from a 32-bit process we lop off the top 32-bits to avoid
2796 		 * getting the wrong address
2797 		 */
2798 		argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
2799 	}
2800 
2801 #if CONFIG_MACF
2802 	error = mac_file_check_fcntl(kauth_cred_get(), fp->fp_glob, cmd, uap->arg);
2803 	if (error) {
2804 		goto out;
2805 	}
2806 #endif
2807 
2808 	switch (cmd) {
2809 	case F_DUPFD:
2810 	case F_DUPFD_CLOEXEC:
2811 		if (fp_isguarded(fp, GUARD_DUP)) {
2812 			error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
2813 			goto out;
2814 		}
2815 		newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2816 		AUDIT_ARG(value32, newmin);
2817 		if (newmin < 0 || newmin >= proc_limitgetcur_nofile(p)) {
2818 			error = EINVAL;
2819 			goto out;
2820 		}
2821 		if ((error = fdalloc(p, newmin, &i))) {
2822 			goto out;
2823 		}
2824 		error = finishdup(p, fdp, fd, i,
2825 		    cmd == F_DUPFD_CLOEXEC ? FP_CLOEXEC : 0, retval);
2826 		goto out;
2827 
2828 	case F_GETFD:
2829 		*retval = (fp->fp_flags & FP_CLOEXEC) ? FD_CLOEXEC : 0;
2830 		error = 0;
2831 		goto out;
2832 
2833 	case F_SETFD:
2834 		AUDIT_ARG(value32, (uint32_t)uap->arg);
2835 		if (uap->arg & FD_CLOEXEC) {
2836 			fp->fp_flags |= FP_CLOEXEC;
2837 			error = 0;
2838 		} else if (!fp->fp_guard_attrs) {
2839 			fp->fp_flags &= ~FP_CLOEXEC;
2840 			error = 0;
2841 		} else {
2842 			error = fp_guard_exception(p,
2843 			    fd, fp, kGUARD_EXC_NOCLOEXEC);
2844 		}
2845 		goto out;
2846 
2847 	case F_GETFL:
2848 		*retval = OFLAGS(fp->f_flag);
2849 		error = 0;
2850 		goto out;
2851 
2852 	case F_SETFL:
2853 		// FIXME (rdar://54898652)
2854 		//
2855 		// this code is broken if fnctl(F_SETFL), ioctl() are
2856 		// called concurrently for the same fileglob.
2857 
2858 		tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2859 		AUDIT_ARG(value32, tmp);
2860 
2861 		os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
2862 			nflags  = oflags & ~FCNTLFLAGS;
2863 			nflags |= FFLAGS(tmp) & FCNTLFLAGS;
2864 		});
2865 		tmp = nflags & FNONBLOCK;
2866 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2867 		if (error) {
2868 			goto out;
2869 		}
2870 		tmp = nflags & FASYNC;
2871 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
2872 		if (!error) {
2873 			goto out;
2874 		}
2875 		os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
2876 		tmp = 0;
2877 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2878 		goto out;
2879 
2880 	case F_GETOWN:
2881 		if (fp->f_type == DTYPE_SOCKET) {
2882 			*retval = ((struct socket *)fp_get_data(fp))->so_pgid;
2883 			error = 0;
2884 			goto out;
2885 		}
2886 		error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
2887 		*retval = -*retval;
2888 		goto out;
2889 
2890 	case F_SETOWN:
2891 		tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
2892 		AUDIT_ARG(value32, tmp);
2893 		if (fp->f_type == DTYPE_SOCKET) {
2894 			((struct socket *)fp_get_data(fp))->so_pgid = tmp;
2895 			error = 0;
2896 			goto out;
2897 		}
2898 		if (fp->f_type == DTYPE_PIPE) {
2899 			error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2900 			goto out;
2901 		}
2902 
2903 		if (tmp <= 0) {
2904 			tmp = -tmp;
2905 		} else {
2906 			proc_t p1 = proc_find(tmp);
2907 			if (p1 == 0) {
2908 				error = ESRCH;
2909 				goto out;
2910 			}
2911 			tmp = (int)p1->p_pgrpid;
2912 			proc_rele(p1);
2913 		}
2914 		error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2915 		goto out;
2916 
2917 	case F_SETNOSIGPIPE:
2918 		tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
2919 		if (fp->f_type == DTYPE_SOCKET) {
2920 #if SOCKETS
2921 			error = sock_setsockopt((struct socket *)fp_get_data(fp),
2922 			    SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
2923 #else
2924 			error = EINVAL;
2925 #endif
2926 		} else {
2927 			struct fileglob *fg = fp->fp_glob;
2928 
2929 			lck_mtx_lock_spin(&fg->fg_lock);
2930 			if (tmp) {
2931 				fg->fg_lflags |= FG_NOSIGPIPE;
2932 			} else {
2933 				fg->fg_lflags &= ~FG_NOSIGPIPE;
2934 			}
2935 			lck_mtx_unlock(&fg->fg_lock);
2936 			error = 0;
2937 		}
2938 		goto out;
2939 
2940 	case F_GETNOSIGPIPE:
2941 		if (fp->f_type == DTYPE_SOCKET) {
2942 #if SOCKETS
2943 			int retsize = sizeof(*retval);
2944 			error = sock_getsockopt((struct socket *)fp_get_data(fp),
2945 			    SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
2946 #else
2947 			error = EINVAL;
2948 #endif
2949 		} else {
2950 			*retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
2951 			    1 : 0;
2952 			error = 0;
2953 		}
2954 		goto out;
2955 
2956 	case F_SETCONFINED:
2957 		/*
2958 		 * If this is the only reference to this fglob in the process
2959 		 * and it's already marked as close-on-fork then mark it as
2960 		 * (immutably) "confined" i.e. any fd that points to it will
2961 		 * forever be close-on-fork, and attempts to use an IPC
2962 		 * mechanism to move the descriptor elsewhere will fail.
2963 		 */
2964 		if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
2965 			struct fileglob *fg = fp->fp_glob;
2966 
2967 			lck_mtx_lock_spin(&fg->fg_lock);
2968 			if (fg->fg_lflags & FG_CONFINED) {
2969 				error = 0;
2970 			} else if (1 != os_ref_get_count_raw(&fg->fg_count)) {
2971 				error = EAGAIN; /* go close the dup .. */
2972 			} else if (fp->fp_flags & FP_CLOFORK) {
2973 				fg->fg_lflags |= FG_CONFINED;
2974 				error = 0;
2975 			} else {
2976 				error = EBADF;  /* open without O_CLOFORK? */
2977 			}
2978 			lck_mtx_unlock(&fg->fg_lock);
2979 		} else {
2980 			/*
2981 			 * Other subsystems may have built on the immutability
2982 			 * of FG_CONFINED; clearing it may be tricky.
2983 			 */
2984 			error = EPERM;          /* immutable */
2985 		}
2986 		goto out;
2987 
2988 	case F_GETCONFINED:
2989 		*retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? 1 : 0;
2990 		error = 0;
2991 		goto out;
2992 
2993 	case F_SETLKWTIMEOUT:
2994 	case F_SETLKW:
2995 	case F_OFD_SETLKWTIMEOUT:
2996 	case F_OFD_SETLKW:
2997 		flg |= F_WAIT;
2998 		OS_FALLTHROUGH;
2999 
3000 	case F_SETLK:
3001 	case F_OFD_SETLK:
3002 		if (fp->f_type != DTYPE_VNODE) {
3003 			error = EBADF;
3004 			goto out;
3005 		}
3006 		vp = (struct vnode *)fp_get_data(fp);
3007 
3008 		fflag = fp->f_flag;
3009 		offset = fp->f_offset;
3010 		proc_fdunlock(p);
3011 
3012 		/* Copy in the lock structure */
3013 		if (F_SETLKWTIMEOUT == cmd || F_OFD_SETLKWTIMEOUT == cmd) {
3014 			error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
3015 			if (error) {
3016 				goto outdrop;
3017 			}
3018 			fl = fltimeout.fl;
3019 			timeout = &fltimeout.timeout;
3020 		} else {
3021 			error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3022 			if (error) {
3023 				goto outdrop;
3024 			}
3025 		}
3026 
3027 		/* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3028 		/* and ending byte for EOVERFLOW in SEEK_SET */
3029 		error = check_file_seek_range(&fl, offset);
3030 		if (error) {
3031 			goto outdrop;
3032 		}
3033 
3034 		if ((error = vnode_getwithref(vp))) {
3035 			goto outdrop;
3036 		}
3037 		if (fl.l_whence == SEEK_CUR) {
3038 			fl.l_start += offset;
3039 		}
3040 
3041 #if CONFIG_MACF
3042 		error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3043 		    F_SETLK, &fl);
3044 		if (error) {
3045 			(void)vnode_put(vp);
3046 			goto outdrop;
3047 		}
3048 #endif
3049 		switch (cmd) {
3050 		case F_OFD_SETLK:
3051 		case F_OFD_SETLKW:
3052 		case F_OFD_SETLKWTIMEOUT:
3053 			flg |= F_OFD_LOCK;
3054 			switch (fl.l_type) {
3055 			case F_RDLCK:
3056 				if ((fflag & FREAD) == 0) {
3057 					error = EBADF;
3058 					break;
3059 				}
3060 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3061 				    F_SETLK, &fl, flg, &context, timeout);
3062 				break;
3063 			case F_WRLCK:
3064 				if ((fflag & FWRITE) == 0) {
3065 					error = EBADF;
3066 					break;
3067 				}
3068 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3069 				    F_SETLK, &fl, flg, &context, timeout);
3070 				break;
3071 			case F_UNLCK:
3072 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3073 				    F_UNLCK, &fl, F_OFD_LOCK, &context,
3074 				    timeout);
3075 				break;
3076 			default:
3077 				error = EINVAL;
3078 				break;
3079 			}
3080 			if (0 == error &&
3081 			    (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
3082 				struct fileglob *fg = fp->fp_glob;
3083 
3084 				/*
3085 				 * arrange F_UNLCK on last close (once
3086 				 * set, FG_HAS_OFDLOCK is immutable)
3087 				 */
3088 				if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
3089 					lck_mtx_lock_spin(&fg->fg_lock);
3090 					fg->fg_lflags |= FG_HAS_OFDLOCK;
3091 					lck_mtx_unlock(&fg->fg_lock);
3092 				}
3093 			}
3094 			break;
3095 		default:
3096 			flg |= F_POSIX;
3097 			switch (fl.l_type) {
3098 			case F_RDLCK:
3099 				if ((fflag & FREAD) == 0) {
3100 					error = EBADF;
3101 					break;
3102 				}
3103 				// XXX UInt32 unsafe for LP64 kernel
3104 				os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3105 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3106 				    F_SETLK, &fl, flg, &context, timeout);
3107 				break;
3108 			case F_WRLCK:
3109 				if ((fflag & FWRITE) == 0) {
3110 					error = EBADF;
3111 					break;
3112 				}
3113 				// XXX UInt32 unsafe for LP64 kernel
3114 				os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3115 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3116 				    F_SETLK, &fl, flg, &context, timeout);
3117 				break;
3118 			case F_UNLCK:
3119 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3120 				    F_UNLCK, &fl, F_POSIX, &context, timeout);
3121 				break;
3122 			default:
3123 				error = EINVAL;
3124 				break;
3125 			}
3126 			break;
3127 		}
3128 		(void) vnode_put(vp);
3129 		goto outdrop;
3130 
3131 	case F_GETLK:
3132 	case F_OFD_GETLK:
3133 	case F_GETLKPID:
3134 	case F_OFD_GETLKPID:
3135 		if (fp->f_type != DTYPE_VNODE) {
3136 			error = EBADF;
3137 			goto out;
3138 		}
3139 		vp = (struct vnode *)fp_get_data(fp);
3140 
3141 		offset = fp->f_offset;
3142 		proc_fdunlock(p);
3143 
3144 		/* Copy in the lock structure */
3145 		error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3146 		if (error) {
3147 			goto outdrop;
3148 		}
3149 
3150 		/* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3151 		/* and ending byte for EOVERFLOW in SEEK_SET */
3152 		error = check_file_seek_range(&fl, offset);
3153 		if (error) {
3154 			goto outdrop;
3155 		}
3156 
3157 		if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
3158 			error = EINVAL;
3159 			goto outdrop;
3160 		}
3161 
3162 		switch (fl.l_type) {
3163 		case F_RDLCK:
3164 		case F_UNLCK:
3165 		case F_WRLCK:
3166 			break;
3167 		default:
3168 			error = EINVAL;
3169 			goto outdrop;
3170 		}
3171 
3172 		switch (fl.l_whence) {
3173 		case SEEK_CUR:
3174 		case SEEK_SET:
3175 		case SEEK_END:
3176 			break;
3177 		default:
3178 			error = EINVAL;
3179 			goto outdrop;
3180 		}
3181 
3182 		if ((error = vnode_getwithref(vp)) == 0) {
3183 			if (fl.l_whence == SEEK_CUR) {
3184 				fl.l_start += offset;
3185 			}
3186 
3187 #if CONFIG_MACF
3188 			error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3189 			    cmd, &fl);
3190 			if (error == 0)
3191 #endif
3192 			switch (cmd) {
3193 			case F_OFD_GETLK:
3194 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3195 				    F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
3196 				break;
3197 			case F_OFD_GETLKPID:
3198 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3199 				    F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
3200 				break;
3201 			default:
3202 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3203 				    cmd, &fl, F_POSIX, &context, NULL);
3204 				break;
3205 			}
3206 
3207 			(void)vnode_put(vp);
3208 
3209 			if (error == 0) {
3210 				error = copyout((caddr_t)&fl, argp, sizeof(fl));
3211 			}
3212 		}
3213 		goto outdrop;
3214 
3215 	case F_PREALLOCATE: {
3216 		fstore_t alloc_struct;    /* structure for allocate command */
3217 		u_int32_t alloc_flags = 0;
3218 
3219 		if (fp->f_type != DTYPE_VNODE) {
3220 			error = EBADF;
3221 			goto out;
3222 		}
3223 
3224 		vp = (struct vnode *)fp_get_data(fp);
3225 		proc_fdunlock(p);
3226 
3227 		/* make sure that we have write permission */
3228 		if ((fp->f_flag & FWRITE) == 0) {
3229 			error = EBADF;
3230 			goto outdrop;
3231 		}
3232 
3233 		error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
3234 		if (error) {
3235 			goto outdrop;
3236 		}
3237 
3238 		/* now set the space allocated to 0 */
3239 		alloc_struct.fst_bytesalloc = 0;
3240 
3241 		/*
3242 		 * Do some simple parameter checking
3243 		 */
3244 
3245 		/* set up the flags */
3246 
3247 		alloc_flags |= PREALLOCATE;
3248 
3249 		if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
3250 			alloc_flags |= ALLOCATECONTIG;
3251 		}
3252 
3253 		if (alloc_struct.fst_flags & F_ALLOCATEALL) {
3254 			alloc_flags |= ALLOCATEALL;
3255 		}
3256 
3257 		/*
3258 		 * Do any position mode specific stuff.  The only
3259 		 * position mode  supported now is PEOFPOSMODE
3260 		 */
3261 
3262 		switch (alloc_struct.fst_posmode) {
3263 		case F_PEOFPOSMODE:
3264 			if (alloc_struct.fst_offset != 0) {
3265 				error = EINVAL;
3266 				goto outdrop;
3267 			}
3268 
3269 			alloc_flags |= ALLOCATEFROMPEOF;
3270 			break;
3271 
3272 		case F_VOLPOSMODE:
3273 			if (alloc_struct.fst_offset <= 0) {
3274 				error = EINVAL;
3275 				goto outdrop;
3276 			}
3277 
3278 			alloc_flags |= ALLOCATEFROMVOL;
3279 			break;
3280 
3281 		default: {
3282 			error = EINVAL;
3283 			goto outdrop;
3284 		}
3285 		}
3286 		if ((error = vnode_getwithref(vp)) == 0) {
3287 			/*
3288 			 * call allocate to get the space
3289 			 */
3290 			error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
3291 			    &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
3292 			    &context);
3293 			(void)vnode_put(vp);
3294 
3295 			error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
3296 
3297 			if (error == 0) {
3298 				error = error2;
3299 			}
3300 		}
3301 		goto outdrop;
3302 	}
3303 	case F_PUNCHHOLE: {
3304 		fpunchhole_t args;
3305 
3306 		if (fp->f_type != DTYPE_VNODE) {
3307 			error = EBADF;
3308 			goto out;
3309 		}
3310 
3311 		vp = (struct vnode *)fp_get_data(fp);
3312 		proc_fdunlock(p);
3313 
3314 		/* need write permissions */
3315 		if ((fp->f_flag & FWRITE) == 0) {
3316 			error = EPERM;
3317 			goto outdrop;
3318 		}
3319 
3320 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3321 			goto outdrop;
3322 		}
3323 
3324 		if ((error = vnode_getwithref(vp))) {
3325 			goto outdrop;
3326 		}
3327 
3328 #if CONFIG_MACF
3329 		if ((error = mac_vnode_check_write(&context, fp->fp_glob->fg_cred, vp))) {
3330 			(void)vnode_put(vp);
3331 			goto outdrop;
3332 		}
3333 #endif
3334 
3335 		error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
3336 		(void)vnode_put(vp);
3337 
3338 		goto outdrop;
3339 	}
3340 	case F_TRIM_ACTIVE_FILE: {
3341 		ftrimactivefile_t args;
3342 
3343 		if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
3344 			error = EACCES;
3345 			goto out;
3346 		}
3347 
3348 		if (fp->f_type != DTYPE_VNODE) {
3349 			error = EBADF;
3350 			goto out;
3351 		}
3352 
3353 		vp = (struct vnode *)fp_get_data(fp);
3354 		proc_fdunlock(p);
3355 
3356 		/* need write permissions */
3357 		if ((fp->f_flag & FWRITE) == 0) {
3358 			error = EPERM;
3359 			goto outdrop;
3360 		}
3361 
3362 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3363 			goto outdrop;
3364 		}
3365 
3366 		if ((error = vnode_getwithref(vp))) {
3367 			goto outdrop;
3368 		}
3369 
3370 		error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
3371 		(void)vnode_put(vp);
3372 
3373 		goto outdrop;
3374 	}
3375 	case F_SPECULATIVE_READ: {
3376 		fspecread_t args;
3377 		off_t temp_length = 0;
3378 
3379 		if (fp->f_type != DTYPE_VNODE) {
3380 			error = EBADF;
3381 			goto out;
3382 		}
3383 
3384 		vp = (struct vnode *)fp_get_data(fp);
3385 		proc_fdunlock(p);
3386 
3387 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3388 			goto outdrop;
3389 		}
3390 
3391 		/* Discard invalid offsets or lengths */
3392 		if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
3393 			error = EINVAL;
3394 			goto outdrop;
3395 		}
3396 
3397 		/*
3398 		 * Round the file offset down to a page-size boundary (or to 0).
3399 		 * The filesystem will need to round the length up to the end of the page boundary
3400 		 * or to the EOF of the file.
3401 		 */
3402 		uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
3403 		uint64_t foff_delta = args.fsr_offset - foff;
3404 		args.fsr_offset = (off_t) foff;
3405 
3406 		/*
3407 		 * Now add in the delta to the supplied length. Since we may have adjusted the
3408 		 * offset, increase it by the amount that we adjusted.
3409 		 */
3410 		if (os_add_overflow(args.fsr_length, foff_delta, &args.fsr_length)) {
3411 			error = EOVERFLOW;
3412 			goto outdrop;
3413 		}
3414 
3415 		/*
3416 		 * Make sure (fsr_offset + fsr_length) does not overflow.
3417 		 */
3418 		if (os_add_overflow(args.fsr_offset, args.fsr_length, &temp_length)) {
3419 			error = EOVERFLOW;
3420 			goto outdrop;
3421 		}
3422 
3423 		if ((error = vnode_getwithref(vp))) {
3424 			goto outdrop;
3425 		}
3426 		error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
3427 		(void)vnode_put(vp);
3428 
3429 		goto outdrop;
3430 	}
3431 	case F_SETSIZE:
3432 		if (fp->f_type != DTYPE_VNODE) {
3433 			error = EBADF;
3434 			goto out;
3435 		}
3436 		vp = (struct vnode *)fp_get_data(fp);
3437 		proc_fdunlock(p);
3438 
3439 		error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
3440 		if (error) {
3441 			goto outdrop;
3442 		}
3443 		AUDIT_ARG(value64, offset);
3444 
3445 		error = vnode_getwithref(vp);
3446 		if (error) {
3447 			goto outdrop;
3448 		}
3449 
3450 #if CONFIG_MACF
3451 		error = mac_vnode_check_truncate(&context,
3452 		    fp->fp_glob->fg_cred, vp);
3453 		if (error) {
3454 			(void)vnode_put(vp);
3455 			goto outdrop;
3456 		}
3457 #endif
3458 		/*
3459 		 * Make sure that we are root.  Growing a file
3460 		 * without zero filling the data is a security hole.
3461 		 */
3462 		if (!kauth_cred_issuser(kauth_cred_get())) {
3463 			error = EACCES;
3464 		} else {
3465 			/*
3466 			 * Require privilege to change file size without zerofill,
3467 			 * else will change the file size and zerofill it.
3468 			 */
3469 			error = priv_check_cred(kauth_cred_get(), PRIV_VFS_SETSIZE, 0);
3470 			if (error == 0) {
3471 				error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
3472 			} else {
3473 				error = vnode_setsize(vp, offset, 0, &context);
3474 			}
3475 
3476 #if CONFIG_MACF
3477 			if (error == 0) {
3478 				mac_vnode_notify_truncate(&context, fp->fp_glob->fg_cred, vp);
3479 			}
3480 #endif
3481 		}
3482 
3483 		(void)vnode_put(vp);
3484 		goto outdrop;
3485 
3486 	case F_RDAHEAD:
3487 		if (fp->f_type != DTYPE_VNODE) {
3488 			error = EBADF;
3489 			goto out;
3490 		}
3491 		if (uap->arg) {
3492 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3493 		} else {
3494 			os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3495 		}
3496 		goto out;
3497 
3498 	case F_NOCACHE:
3499 		if (fp->f_type != DTYPE_VNODE) {
3500 			error = EBADF;
3501 			goto out;
3502 		}
3503 		if (uap->arg) {
3504 			os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3505 		} else {
3506 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3507 		}
3508 		goto out;
3509 
3510 	case F_NODIRECT:
3511 		if (fp->f_type != DTYPE_VNODE) {
3512 			error = EBADF;
3513 			goto out;
3514 		}
3515 		if (uap->arg) {
3516 			os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3517 		} else {
3518 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3519 		}
3520 		goto out;
3521 
3522 	case F_SINGLE_WRITER:
3523 		if (fp->f_type != DTYPE_VNODE) {
3524 			error = EBADF;
3525 			goto out;
3526 		}
3527 		if (uap->arg) {
3528 			os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3529 		} else {
3530 			os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3531 		}
3532 		goto out;
3533 
3534 	case F_GLOBAL_NOCACHE:
3535 		if (fp->f_type != DTYPE_VNODE) {
3536 			error = EBADF;
3537 			goto out;
3538 		}
3539 		vp = (struct vnode *)fp_get_data(fp);
3540 		proc_fdunlock(p);
3541 
3542 		if ((error = vnode_getwithref(vp)) == 0) {
3543 			*retval = vnode_isnocache(vp);
3544 
3545 			if (uap->arg) {
3546 				vnode_setnocache(vp);
3547 			} else {
3548 				vnode_clearnocache(vp);
3549 			}
3550 
3551 			(void)vnode_put(vp);
3552 		}
3553 		goto outdrop;
3554 
3555 	case F_CHECK_OPENEVT:
3556 		if (fp->f_type != DTYPE_VNODE) {
3557 			error = EBADF;
3558 			goto out;
3559 		}
3560 		vp = (struct vnode *)fp_get_data(fp);
3561 		proc_fdunlock(p);
3562 
3563 		if ((error = vnode_getwithref(vp)) == 0) {
3564 			*retval = vnode_is_openevt(vp);
3565 
3566 			if (uap->arg) {
3567 				vnode_set_openevt(vp);
3568 			} else {
3569 				vnode_clear_openevt(vp);
3570 			}
3571 
3572 			(void)vnode_put(vp);
3573 		}
3574 		goto outdrop;
3575 
3576 	case F_RDADVISE: {
3577 		struct radvisory ra_struct;
3578 
3579 		if (fp->f_type != DTYPE_VNODE) {
3580 			error = EBADF;
3581 			goto out;
3582 		}
3583 		vp = (struct vnode *)fp_get_data(fp);
3584 		proc_fdunlock(p);
3585 
3586 		if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
3587 			goto outdrop;
3588 		}
3589 		if (ra_struct.ra_offset < 0 || ra_struct.ra_count < 0) {
3590 			error = EINVAL;
3591 			goto outdrop;
3592 		}
3593 		if ((error = vnode_getwithref(vp)) == 0) {
3594 			error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
3595 
3596 			(void)vnode_put(vp);
3597 		}
3598 		goto outdrop;
3599 	}
3600 
3601 	case F_FLUSH_DATA:
3602 
3603 		if (fp->f_type != DTYPE_VNODE) {
3604 			error = EBADF;
3605 			goto out;
3606 		}
3607 		vp = (struct vnode *)fp_get_data(fp);
3608 		proc_fdunlock(p);
3609 
3610 		if ((error = vnode_getwithref(vp)) == 0) {
3611 			error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
3612 
3613 			(void)vnode_put(vp);
3614 		}
3615 		goto outdrop;
3616 
3617 	case F_LOG2PHYS:
3618 	case F_LOG2PHYS_EXT: {
3619 		struct log2phys l2p_struct = {};    /* structure for allocate command */
3620 		int devBlockSize;
3621 
3622 		off_t file_offset = 0;
3623 		size_t a_size = 0;
3624 		size_t run = 0;
3625 
3626 		if (cmd == F_LOG2PHYS_EXT) {
3627 			error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
3628 			if (error) {
3629 				goto out;
3630 			}
3631 			file_offset = l2p_struct.l2p_devoffset;
3632 		} else {
3633 			file_offset = fp->f_offset;
3634 		}
3635 		if (fp->f_type != DTYPE_VNODE) {
3636 			error = EBADF;
3637 			goto out;
3638 		}
3639 		vp = (struct vnode *)fp_get_data(fp);
3640 		proc_fdunlock(p);
3641 		if ((error = vnode_getwithref(vp))) {
3642 			goto outdrop;
3643 		}
3644 		error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
3645 		if (error) {
3646 			(void)vnode_put(vp);
3647 			goto outdrop;
3648 		}
3649 		error = VNOP_BLKTOOFF(vp, lbn, &offset);
3650 		if (error) {
3651 			(void)vnode_put(vp);
3652 			goto outdrop;
3653 		}
3654 		devBlockSize = vfs_devblocksize(vnode_mount(vp));
3655 		if (cmd == F_LOG2PHYS_EXT) {
3656 			if (l2p_struct.l2p_contigbytes < 0) {
3657 				vnode_put(vp);
3658 				error = EINVAL;
3659 				goto outdrop;
3660 			}
3661 
3662 			a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
3663 		} else {
3664 			a_size = devBlockSize;
3665 		}
3666 
3667 		error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
3668 
3669 		(void)vnode_put(vp);
3670 
3671 		if (!error) {
3672 			l2p_struct.l2p_flags = 0;       /* for now */
3673 			if (cmd == F_LOG2PHYS_EXT) {
3674 				l2p_struct.l2p_contigbytes = run - (file_offset - offset);
3675 			} else {
3676 				l2p_struct.l2p_contigbytes = 0; /* for now */
3677 			}
3678 
3679 			/*
3680 			 * The block number being -1 suggests that the file offset is not backed
3681 			 * by any real blocks on-disk.  As a result, just let it be passed back up wholesale.
3682 			 */
3683 			if (bn == -1) {
3684 				/* Don't multiply it by the block size */
3685 				l2p_struct.l2p_devoffset = bn;
3686 			} else {
3687 				l2p_struct.l2p_devoffset = bn * devBlockSize;
3688 				l2p_struct.l2p_devoffset += file_offset - offset;
3689 			}
3690 			error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
3691 		}
3692 		goto outdrop;
3693 	}
3694 	case F_GETPATH:
3695 	case F_GETPATH_NOFIRMLINK: {
3696 		char *pathbufp;
3697 		int pathlen;
3698 
3699 		if (fp->f_type != DTYPE_VNODE) {
3700 			error = EBADF;
3701 			goto out;
3702 		}
3703 		vp = (struct vnode *)fp_get_data(fp);
3704 		proc_fdunlock(p);
3705 
3706 		pathlen = MAXPATHLEN;
3707 		pathbufp = zalloc(ZV_NAMEI);
3708 
3709 		if ((error = vnode_getwithref(vp)) == 0) {
3710 			if (cmd == F_GETPATH_NOFIRMLINK) {
3711 				error = vn_getpath_ext(vp, NULL, pathbufp, &pathlen, VN_GETPATH_NO_FIRMLINK);
3712 			} else {
3713 				error = vn_getpath(vp, pathbufp, &pathlen);
3714 			}
3715 			(void)vnode_put(vp);
3716 
3717 			if (error == 0) {
3718 				error = copyout((caddr_t)pathbufp, argp, pathlen);
3719 			}
3720 		}
3721 		zfree(ZV_NAMEI, pathbufp);
3722 		goto outdrop;
3723 	}
3724 
3725 	case F_PATHPKG_CHECK: {
3726 		char *pathbufp;
3727 		size_t pathlen;
3728 
3729 		if (fp->f_type != DTYPE_VNODE) {
3730 			error = EBADF;
3731 			goto out;
3732 		}
3733 		vp = (struct vnode *)fp_get_data(fp);
3734 		proc_fdunlock(p);
3735 
3736 		pathlen = MAXPATHLEN;
3737 		pathbufp = zalloc(ZV_NAMEI);
3738 
3739 		if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
3740 			if ((error = vnode_getwithref(vp)) == 0) {
3741 				AUDIT_ARG(text, pathbufp);
3742 				error = vn_path_package_check(vp, pathbufp, (int)pathlen, retval);
3743 
3744 				(void)vnode_put(vp);
3745 			}
3746 		}
3747 		zfree(ZV_NAMEI, pathbufp);
3748 		goto outdrop;
3749 	}
3750 
3751 	case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
3752 	case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZE
3753 	case F_BARRIERFSYNC:  // fsync + barrier
3754 	case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
3755 	case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
3756 		if (fp->f_type != DTYPE_VNODE) {
3757 			error = EBADF;
3758 			goto out;
3759 		}
3760 		vp = (struct vnode *)fp_get_data(fp);
3761 		proc_fdunlock(p);
3762 
3763 		if ((error = vnode_getwithref(vp)) == 0) {
3764 			error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
3765 
3766 			(void)vnode_put(vp);
3767 		}
3768 		break;
3769 	}
3770 
3771 	/*
3772 	 * SPI (private) for opening a file starting from a dir fd
3773 	 */
3774 	case F_OPENFROM: {
3775 		/* Check if this isn't a valid file descriptor */
3776 		if (fp->f_type != DTYPE_VNODE) {
3777 			error = EBADF;
3778 			goto out;
3779 		}
3780 		vp = (struct vnode *)fp_get_data(fp);
3781 
3782 		return sys_fcntl__OPENFROM(p, fd, cmd, uap->arg, fp, vp, retval);
3783 	}
3784 
3785 	/*
3786 	 * SPI (private) for unlinking a file starting from a dir fd
3787 	 */
3788 	case F_UNLINKFROM: {
3789 		user_addr_t pathname;
3790 
3791 		/* Check if this isn't a valid file descriptor */
3792 		if ((fp->f_type != DTYPE_VNODE) ||
3793 		    (fp->f_flag & FREAD) == 0) {
3794 			error = EBADF;
3795 			goto out;
3796 		}
3797 		vp = (struct vnode *)fp_get_data(fp);
3798 		proc_fdunlock(p);
3799 
3800 		if (vnode_getwithref(vp)) {
3801 			error = ENOENT;
3802 			goto outdrop;
3803 		}
3804 
3805 		/* Only valid for directories */
3806 		if (vp->v_type != VDIR) {
3807 			vnode_put(vp);
3808 			error = ENOTDIR;
3809 			goto outdrop;
3810 		}
3811 
3812 		/*
3813 		 * Only entitled apps may use the credentials of the thread
3814 		 * that opened the file descriptor.
3815 		 * Non-entitled threads will use their own context.
3816 		 */
3817 		if (IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT)) {
3818 			has_entitlement = 1;
3819 		}
3820 
3821 		/* Get flags, mode and pathname arguments. */
3822 		if (IS_64BIT_PROCESS(p)) {
3823 			pathname = (user_addr_t)argp;
3824 		} else {
3825 			pathname = CAST_USER_ADDR_T(argp);
3826 		}
3827 
3828 		/* Start the lookup relative to the file descriptor's vnode. */
3829 		error = unlink1(has_entitlement ? &context : vfs_context_current(),
3830 		    vp, pathname, UIO_USERSPACE, 0);
3831 
3832 		vnode_put(vp);
3833 		break;
3834 	}
3835 
3836 	case F_ADDSIGS:
3837 	case F_ADDFILESIGS:
3838 	case F_ADDFILESIGS_FOR_DYLD_SIM:
3839 	case F_ADDFILESIGS_RETURN:
3840 	case F_ADDFILESIGS_INFO:
3841 	{
3842 		struct cs_blob *blob = NULL;
3843 		struct user_fsignatures fs;
3844 		kern_return_t kr;
3845 		vm_offset_t kernel_blob_addr;
3846 		vm_size_t kernel_blob_size;
3847 		int blob_add_flags = 0;
3848 		const size_t sizeof_fs = (cmd == F_ADDFILESIGS_INFO ?
3849 		    offsetof(struct user_fsignatures, fs_cdhash /* first output element */) :
3850 		    offsetof(struct user_fsignatures, fs_fsignatures_size /* compat */));
3851 
3852 		if (fp->f_type != DTYPE_VNODE) {
3853 			error = EBADF;
3854 			goto out;
3855 		}
3856 		vp = (struct vnode *)fp_get_data(fp);
3857 		proc_fdunlock(p);
3858 
3859 		if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3860 			blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
3861 			if ((proc_getcsflags(p) & CS_KILL) == 0) {
3862 				proc_lock(p);
3863 				proc_csflags_set(p, CS_KILL);
3864 				proc_unlock(p);
3865 			}
3866 		}
3867 
3868 		error = vnode_getwithref(vp);
3869 		if (error) {
3870 			goto outdrop;
3871 		}
3872 
3873 		if (IS_64BIT_PROCESS(p)) {
3874 			error = copyin(argp, &fs, sizeof_fs);
3875 		} else {
3876 			if (cmd == F_ADDFILESIGS_INFO) {
3877 				error = EINVAL;
3878 				vnode_put(vp);
3879 				goto outdrop;
3880 			}
3881 
3882 			struct user32_fsignatures fs32;
3883 
3884 			error = copyin(argp, &fs32, sizeof(fs32));
3885 			fs.fs_file_start = fs32.fs_file_start;
3886 			fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
3887 			fs.fs_blob_size = fs32.fs_blob_size;
3888 		}
3889 
3890 		if (error) {
3891 			vnode_put(vp);
3892 			goto outdrop;
3893 		}
3894 
3895 		/*
3896 		 * First check if we have something loaded a this offset
3897 		 */
3898 		blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
3899 		if (blob != NULL) {
3900 			/* If this is for dyld_sim revalidate the blob */
3901 			if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3902 				error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
3903 				if (error) {
3904 					blob = NULL;
3905 					if (error != EAGAIN) {
3906 						vnode_put(vp);
3907 						goto outdrop;
3908 					}
3909 				}
3910 			}
3911 		}
3912 
3913 		if (blob == NULL) {
3914 			/*
3915 			 * An arbitrary limit, to prevent someone from mapping in a 20GB blob.  This should cover
3916 			 * our use cases for the immediate future, but note that at the time of this commit, some
3917 			 * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
3918 			 *
3919 			 * We should consider how we can manage this more effectively; the above means that some
3920 			 * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
3921 			 * threshold considered ridiculous at the time of this change.
3922 			 */
3923 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
3924 			if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
3925 				error = E2BIG;
3926 				vnode_put(vp);
3927 				goto outdrop;
3928 			}
3929 
3930 			kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
3931 			kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
3932 			if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
3933 				error = ENOMEM;
3934 				vnode_put(vp);
3935 				goto outdrop;
3936 			}
3937 
3938 			if (cmd == F_ADDSIGS) {
3939 				error = copyin(fs.fs_blob_start,
3940 				    (void *) kernel_blob_addr,
3941 				    fs.fs_blob_size);
3942 			} else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM || F_ADDFILESIGS_INFO */
3943 				int resid;
3944 
3945 				error = vn_rdwr(UIO_READ,
3946 				    vp,
3947 				    (caddr_t) kernel_blob_addr,
3948 				    (int)kernel_blob_size,
3949 				    fs.fs_file_start + fs.fs_blob_start,
3950 				    UIO_SYSSPACE,
3951 				    0,
3952 				    kauth_cred_get(),
3953 				    &resid,
3954 				    p);
3955 				if ((error == 0) && resid) {
3956 					/* kernel_blob_size rounded to a page size, but signature may be at end of file */
3957 					memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
3958 				}
3959 			}
3960 
3961 			if (error) {
3962 				ubc_cs_blob_deallocate(kernel_blob_addr,
3963 				    kernel_blob_size);
3964 				vnode_put(vp);
3965 				goto outdrop;
3966 			}
3967 
3968 			blob = NULL;
3969 			error = ubc_cs_blob_add(vp,
3970 			    proc_platform(p),
3971 			    CPU_TYPE_ANY,                       /* not for a specific architecture */
3972 			    CPU_SUBTYPE_ANY,
3973 			    fs.fs_file_start,
3974 			    &kernel_blob_addr,
3975 			    kernel_blob_size,
3976 			    NULL,
3977 			    blob_add_flags,
3978 			    &blob);
3979 
3980 			/* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
3981 			if (error) {
3982 				if (kernel_blob_addr) {
3983 					ubc_cs_blob_deallocate(kernel_blob_addr,
3984 					    kernel_blob_size);
3985 				}
3986 				vnode_put(vp);
3987 				goto outdrop;
3988 			} else {
3989 #if CHECK_CS_VALIDATION_BITMAP
3990 				ubc_cs_validation_bitmap_allocate( vp );
3991 #endif
3992 			}
3993 		}
3994 
3995 		if (cmd == F_ADDFILESIGS_RETURN || cmd == F_ADDFILESIGS_FOR_DYLD_SIM ||
3996 		    cmd == F_ADDFILESIGS_INFO) {
3997 			/*
3998 			 * The first element of the structure is a
3999 			 * off_t that happen to have the same size for
4000 			 * all archs. Lets overwrite that.
4001 			 */
4002 			off_t end_offset = 0;
4003 			if (blob) {
4004 				end_offset = blob->csb_end_offset;
4005 			}
4006 			error = copyout(&end_offset, argp, sizeof(end_offset));
4007 
4008 			if (error) {
4009 				vnode_put(vp);
4010 				goto outdrop;
4011 			}
4012 		}
4013 
4014 		if (cmd == F_ADDFILESIGS_INFO) {
4015 			/* Return information. What we copy out depends on the size of the
4016 			 * passed in structure, to keep binary compatibility. */
4017 
4018 			if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
4019 				// enough room for fs_cdhash[20]+fs_hash_type
4020 
4021 				if (blob != NULL) {
4022 					error = copyout(blob->csb_cdhash,
4023 					    (vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
4024 					    USER_FSIGNATURES_CDHASH_LEN);
4025 					if (error) {
4026 						vnode_put(vp);
4027 						goto outdrop;
4028 					}
4029 					int hashtype = cs_hash_type(blob->csb_hashtype);
4030 					error = copyout(&hashtype,
4031 					    (vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
4032 					    sizeof(int));
4033 					if (error) {
4034 						vnode_put(vp);
4035 						goto outdrop;
4036 					}
4037 				}
4038 			}
4039 		}
4040 
4041 		(void) vnode_put(vp);
4042 		break;
4043 	}
4044 #if CONFIG_SUPPLEMENTAL_SIGNATURES
4045 	case F_ADDFILESUPPL:
4046 	{
4047 		struct vnode *ivp;
4048 		struct cs_blob *blob = NULL;
4049 		struct user_fsupplement fs;
4050 		int orig_fd;
4051 		struct fileproc* orig_fp = NULL;
4052 		kern_return_t kr;
4053 		vm_offset_t kernel_blob_addr;
4054 		vm_size_t kernel_blob_size;
4055 
4056 		if (!IS_64BIT_PROCESS(p)) {
4057 			error = EINVAL;
4058 			goto out; // drop fp and unlock fds
4059 		}
4060 
4061 		if (fp->f_type != DTYPE_VNODE) {
4062 			error = EBADF;
4063 			goto out;
4064 		}
4065 
4066 		error = copyin(argp, &fs, sizeof(fs));
4067 		if (error) {
4068 			goto out;
4069 		}
4070 
4071 		orig_fd = fs.fs_orig_fd;
4072 		if ((error = fp_lookup(p, orig_fd, &orig_fp, 1))) {
4073 			printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
4074 			goto out;
4075 		}
4076 
4077 		if (orig_fp->f_type != DTYPE_VNODE) {
4078 			error = EBADF;
4079 			fp_drop(p, orig_fd, orig_fp, 1);
4080 			goto out;
4081 		}
4082 
4083 		ivp = (struct vnode *)fp_get_data(orig_fp);
4084 
4085 		vp = (struct vnode *)fp_get_data(fp);
4086 
4087 		proc_fdunlock(p);
4088 
4089 		error = vnode_getwithref(ivp);
4090 		if (error) {
4091 			fp_drop(p, orig_fd, orig_fp, 0);
4092 			goto outdrop; //drop fp
4093 		}
4094 
4095 		error = vnode_getwithref(vp);
4096 		if (error) {
4097 			vnode_put(ivp);
4098 			fp_drop(p, orig_fd, orig_fp, 0);
4099 			goto outdrop;
4100 		}
4101 
4102 		if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
4103 			error = E2BIG;
4104 			goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
4105 		}
4106 
4107 		kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
4108 		kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
4109 		if (kr != KERN_SUCCESS) {
4110 			error = ENOMEM;
4111 			goto dropboth;
4112 		}
4113 
4114 		int resid;
4115 		error = vn_rdwr(UIO_READ, vp,
4116 		    (caddr_t)kernel_blob_addr, (int)kernel_blob_size,
4117 		    fs.fs_file_start + fs.fs_blob_start,
4118 		    UIO_SYSSPACE, 0,
4119 		    kauth_cred_get(), &resid, p);
4120 		if ((error == 0) && resid) {
4121 			/* kernel_blob_size rounded to a page size, but signature may be at end of file */
4122 			memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
4123 		}
4124 
4125 		if (error) {
4126 			ubc_cs_blob_deallocate(kernel_blob_addr,
4127 			    kernel_blob_size);
4128 			goto dropboth;
4129 		}
4130 
4131 		error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
4132 		    &kernel_blob_addr, kernel_blob_size, &blob);
4133 
4134 		/* ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed */
4135 		if (error) {
4136 			if (kernel_blob_addr) {
4137 				ubc_cs_blob_deallocate(kernel_blob_addr,
4138 				    kernel_blob_size);
4139 			}
4140 			goto dropboth;
4141 		}
4142 		vnode_put(ivp);
4143 		vnode_put(vp);
4144 		fp_drop(p, orig_fd, orig_fp, 0);
4145 		break;
4146 
4147 dropboth:
4148 		vnode_put(ivp);
4149 		vnode_put(vp);
4150 		fp_drop(p, orig_fd, orig_fp, 0);
4151 		goto outdrop;
4152 	}
4153 #endif
4154 	case F_GETCODEDIR:
4155 	case F_FINDSIGS: {
4156 		error = ENOTSUP;
4157 		goto out;
4158 	}
4159 	case F_CHECK_LV: {
4160 		struct fileglob *fg;
4161 		fchecklv_t lv = {};
4162 
4163 		if (fp->f_type != DTYPE_VNODE) {
4164 			error = EBADF;
4165 			goto out;
4166 		}
4167 		fg = fp->fp_glob;
4168 		proc_fdunlock(p);
4169 
4170 		if (IS_64BIT_PROCESS(p)) {
4171 			error = copyin(argp, &lv, sizeof(lv));
4172 		} else {
4173 			struct user32_fchecklv lv32 = {};
4174 
4175 			error = copyin(argp, &lv32, sizeof(lv32));
4176 			lv.lv_file_start = lv32.lv_file_start;
4177 			lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
4178 			lv.lv_error_message_size = lv32.lv_error_message_size;
4179 		}
4180 		if (error) {
4181 			goto outdrop;
4182 		}
4183 
4184 #if CONFIG_MACF
4185 		error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
4186 		    (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
4187 #endif
4188 
4189 		break;
4190 	}
4191 	case F_GETSIGSINFO: {
4192 		struct cs_blob *blob = NULL;
4193 		fgetsigsinfo_t sigsinfo = {};
4194 
4195 		if (fp->f_type != DTYPE_VNODE) {
4196 			error = EBADF;
4197 			goto out;
4198 		}
4199 		vp = (struct vnode *)fp_get_data(fp);
4200 		proc_fdunlock(p);
4201 
4202 		error = vnode_getwithref(vp);
4203 		if (error) {
4204 			goto outdrop;
4205 		}
4206 
4207 		error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
4208 		if (error) {
4209 			vnode_put(vp);
4210 			goto outdrop;
4211 		}
4212 
4213 		blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
4214 		if (blob == NULL) {
4215 			error = ENOENT;
4216 			vnode_put(vp);
4217 			goto outdrop;
4218 		}
4219 		switch (sigsinfo.fg_info_request) {
4220 		case GETSIGSINFO_PLATFORM_BINARY:
4221 			sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
4222 			error = copyout(&sigsinfo.fg_sig_is_platform,
4223 			    (vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
4224 			    sizeof(sigsinfo.fg_sig_is_platform));
4225 			if (error) {
4226 				vnode_put(vp);
4227 				goto outdrop;
4228 			}
4229 			break;
4230 		default:
4231 			error = EINVAL;
4232 			vnode_put(vp);
4233 			goto outdrop;
4234 		}
4235 		vnode_put(vp);
4236 		break;
4237 	}
4238 #if CONFIG_PROTECT
4239 	case F_GETPROTECTIONCLASS: {
4240 		if (fp->f_type != DTYPE_VNODE) {
4241 			error = EBADF;
4242 			goto out;
4243 		}
4244 		vp = (struct vnode *)fp_get_data(fp);
4245 
4246 		proc_fdunlock(p);
4247 
4248 		if (vnode_getwithref(vp)) {
4249 			error = ENOENT;
4250 			goto outdrop;
4251 		}
4252 
4253 		struct vnode_attr va;
4254 
4255 		VATTR_INIT(&va);
4256 		VATTR_WANTED(&va, va_dataprotect_class);
4257 		error = VNOP_GETATTR(vp, &va, &context);
4258 		if (!error) {
4259 			if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
4260 				*retval = va.va_dataprotect_class;
4261 			} else {
4262 				error = ENOTSUP;
4263 			}
4264 		}
4265 
4266 		vnode_put(vp);
4267 		break;
4268 	}
4269 
4270 	case F_SETPROTECTIONCLASS: {
4271 		/* tmp must be a valid PROTECTION_CLASS_* */
4272 		tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4273 
4274 		if (fp->f_type != DTYPE_VNODE) {
4275 			error = EBADF;
4276 			goto out;
4277 		}
4278 		vp = (struct vnode *)fp_get_data(fp);
4279 
4280 		proc_fdunlock(p);
4281 
4282 		if (vnode_getwithref(vp)) {
4283 			error = ENOENT;
4284 			goto outdrop;
4285 		}
4286 
4287 		/* Only go forward if you have write access */
4288 		vfs_context_t ctx = vfs_context_current();
4289 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4290 			vnode_put(vp);
4291 			error = EBADF;
4292 			goto outdrop;
4293 		}
4294 
4295 		struct vnode_attr va;
4296 
4297 		VATTR_INIT(&va);
4298 		VATTR_SET(&va, va_dataprotect_class, tmp);
4299 
4300 		error = VNOP_SETATTR(vp, &va, ctx);
4301 
4302 		vnode_put(vp);
4303 		break;
4304 	}
4305 
4306 	case F_TRANSCODEKEY: {
4307 		if (fp->f_type != DTYPE_VNODE) {
4308 			error = EBADF;
4309 			goto out;
4310 		}
4311 
4312 		vp = (struct vnode *)fp_get_data(fp);
4313 		proc_fdunlock(p);
4314 
4315 		if (vnode_getwithref(vp)) {
4316 			error = ENOENT;
4317 			goto outdrop;
4318 		}
4319 
4320 		cp_key_t k = {
4321 			.len = CP_MAX_WRAPPEDKEYSIZE,
4322 		};
4323 
4324 		k.key = kalloc_data(CP_MAX_WRAPPEDKEYSIZE, Z_WAITOK | Z_ZERO);
4325 		if (k.key == NULL) {
4326 			error = ENOMEM;
4327 		} else {
4328 			error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
4329 		}
4330 
4331 		vnode_put(vp);
4332 
4333 		if (error == 0) {
4334 			error = copyout(k.key, argp, k.len);
4335 			*retval = k.len;
4336 		}
4337 		kfree_data(k.key, CP_MAX_WRAPPEDKEYSIZE);
4338 
4339 		break;
4340 	}
4341 
4342 	case F_GETPROTECTIONLEVEL:  {
4343 		if (fp->f_type != DTYPE_VNODE) {
4344 			error = EBADF;
4345 			goto out;
4346 		}
4347 
4348 		vp = (struct vnode*)fp_get_data(fp);
4349 		proc_fdunlock(p);
4350 
4351 		if (vnode_getwithref(vp)) {
4352 			error = ENOENT;
4353 			goto outdrop;
4354 		}
4355 
4356 		error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
4357 
4358 		vnode_put(vp);
4359 		break;
4360 	}
4361 
4362 	case F_GETDEFAULTPROTLEVEL:  {
4363 		if (fp->f_type != DTYPE_VNODE) {
4364 			error = EBADF;
4365 			goto out;
4366 		}
4367 
4368 		vp = (struct vnode*)fp_get_data(fp);
4369 		proc_fdunlock(p);
4370 
4371 		if (vnode_getwithref(vp)) {
4372 			error = ENOENT;
4373 			goto outdrop;
4374 		}
4375 
4376 		/*
4377 		 * if cp_get_major_vers fails, error will be set to proper errno
4378 		 * and cp_version will still be 0.
4379 		 */
4380 
4381 		error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
4382 
4383 		vnode_put(vp);
4384 		break;
4385 	}
4386 
4387 #endif /* CONFIG_PROTECT */
4388 
4389 	case F_MOVEDATAEXTENTS: {
4390 		struct fileproc *fp2 = NULL;
4391 		struct vnode *src_vp = NULLVP;
4392 		struct vnode *dst_vp = NULLVP;
4393 		/* We need to grab the 2nd FD out of the argments before moving on. */
4394 		int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
4395 
4396 		error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
4397 		if (error) {
4398 			goto out;
4399 		}
4400 
4401 		if (fp->f_type != DTYPE_VNODE) {
4402 			error = EBADF;
4403 			goto out;
4404 		}
4405 
4406 		/*
4407 		 * For now, special case HFS+ and APFS only, since this
4408 		 * is SPI.
4409 		 */
4410 		src_vp = (struct vnode *)fp_get_data(fp);
4411 		if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
4412 			error = ENOTSUP;
4413 			goto out;
4414 		}
4415 
4416 		/*
4417 		 * Get the references before we start acquiring iocounts on the vnodes,
4418 		 * while we still hold the proc fd lock
4419 		 */
4420 		if ((error = fp_lookup(p, fd2, &fp2, 1))) {
4421 			error = EBADF;
4422 			goto out;
4423 		}
4424 		if (fp2->f_type != DTYPE_VNODE) {
4425 			fp_drop(p, fd2, fp2, 1);
4426 			error = EBADF;
4427 			goto out;
4428 		}
4429 		dst_vp = (struct vnode *)fp_get_data(fp2);
4430 		if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
4431 			fp_drop(p, fd2, fp2, 1);
4432 			error = ENOTSUP;
4433 			goto out;
4434 		}
4435 
4436 #if CONFIG_MACF
4437 		/* Re-do MAC checks against the new FD, pass in a fake argument */
4438 		error = mac_file_check_fcntl(kauth_cred_get(), fp2->fp_glob, cmd, 0);
4439 		if (error) {
4440 			fp_drop(p, fd2, fp2, 1);
4441 			goto out;
4442 		}
4443 #endif
4444 		/* Audit the 2nd FD */
4445 		AUDIT_ARG(fd, fd2);
4446 
4447 		proc_fdunlock(p);
4448 
4449 		if (vnode_getwithref(src_vp)) {
4450 			fp_drop(p, fd2, fp2, 0);
4451 			error = ENOENT;
4452 			goto outdrop;
4453 		}
4454 		if (vnode_getwithref(dst_vp)) {
4455 			vnode_put(src_vp);
4456 			fp_drop(p, fd2, fp2, 0);
4457 			error = ENOENT;
4458 			goto outdrop;
4459 		}
4460 
4461 		/*
4462 		 * Basic asserts; validate they are not the same and that
4463 		 * both live on the same filesystem.
4464 		 */
4465 		if (dst_vp == src_vp) {
4466 			vnode_put(src_vp);
4467 			vnode_put(dst_vp);
4468 			fp_drop(p, fd2, fp2, 0);
4469 			error = EINVAL;
4470 			goto outdrop;
4471 		}
4472 
4473 		if (dst_vp->v_mount != src_vp->v_mount) {
4474 			vnode_put(src_vp);
4475 			vnode_put(dst_vp);
4476 			fp_drop(p, fd2, fp2, 0);
4477 			error = EXDEV;
4478 			goto outdrop;
4479 		}
4480 
4481 		/* Now we have a legit pair of FDs.  Go to work */
4482 
4483 		/* Now check for write access to the target files */
4484 		if (vnode_authorize(src_vp, NULLVP,
4485 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4486 			vnode_put(src_vp);
4487 			vnode_put(dst_vp);
4488 			fp_drop(p, fd2, fp2, 0);
4489 			error = EBADF;
4490 			goto outdrop;
4491 		}
4492 
4493 		if (vnode_authorize(dst_vp, NULLVP,
4494 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4495 			vnode_put(src_vp);
4496 			vnode_put(dst_vp);
4497 			fp_drop(p, fd2, fp2, 0);
4498 			error = EBADF;
4499 			goto outdrop;
4500 		}
4501 
4502 		/* Verify that both vps point to files and not directories */
4503 		if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
4504 			error = EINVAL;
4505 			vnode_put(src_vp);
4506 			vnode_put(dst_vp);
4507 			fp_drop(p, fd2, fp2, 0);
4508 			goto outdrop;
4509 		}
4510 
4511 		/*
4512 		 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
4513 		 * We'll pass in our special bit indicating that the new behavior is expected
4514 		 */
4515 
4516 		error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
4517 
4518 		vnode_put(src_vp);
4519 		vnode_put(dst_vp);
4520 		fp_drop(p, fd2, fp2, 0);
4521 		break;
4522 	}
4523 
4524 	/*
4525 	 * SPI for making a file compressed.
4526 	 */
4527 	case F_MAKECOMPRESSED: {
4528 		uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4529 
4530 		if (fp->f_type != DTYPE_VNODE) {
4531 			error = EBADF;
4532 			goto out;
4533 		}
4534 
4535 		vp = (struct vnode*)fp_get_data(fp);
4536 		proc_fdunlock(p);
4537 
4538 		/* get the vnode */
4539 		if (vnode_getwithref(vp)) {
4540 			error = ENOENT;
4541 			goto outdrop;
4542 		}
4543 
4544 		/* Is it a file? */
4545 		if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
4546 			vnode_put(vp);
4547 			error = EBADF;
4548 			goto outdrop;
4549 		}
4550 
4551 		/* invoke ioctl to pass off to FS */
4552 		/* Only go forward if you have write access */
4553 		vfs_context_t ctx = vfs_context_current();
4554 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4555 			vnode_put(vp);
4556 			error = EBADF;
4557 			goto outdrop;
4558 		}
4559 
4560 		error = VNOP_IOCTL(vp, cmd, (caddr_t)&gcounter, 0, &context);
4561 
4562 		vnode_put(vp);
4563 		break;
4564 	}
4565 
4566 	/*
4567 	 * SPI (private) for indicating to a filesystem that subsequent writes to
4568 	 * the open FD will written to the Fastflow.
4569 	 */
4570 	case F_SET_GREEDY_MODE:
4571 	/* intentionally drop through to the same handler as F_SETSTATIC.
4572 	 * both fcntls should pass the argument and their selector into VNOP_IOCTL.
4573 	 */
4574 
4575 	/*
4576 	 * SPI (private) for indicating to a filesystem that subsequent writes to
4577 	 * the open FD will represent static content.
4578 	 */
4579 	case F_SETSTATICCONTENT: {
4580 		caddr_t ioctl_arg = NULL;
4581 
4582 		if (uap->arg) {
4583 			ioctl_arg = (caddr_t) 1;
4584 		}
4585 
4586 		if (fp->f_type != DTYPE_VNODE) {
4587 			error = EBADF;
4588 			goto out;
4589 		}
4590 		vp = (struct vnode *)fp_get_data(fp);
4591 		proc_fdunlock(p);
4592 
4593 		error = vnode_getwithref(vp);
4594 		if (error) {
4595 			error = ENOENT;
4596 			goto outdrop;
4597 		}
4598 
4599 		/* Only go forward if you have write access */
4600 		vfs_context_t ctx = vfs_context_current();
4601 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4602 			vnode_put(vp);
4603 			error = EBADF;
4604 			goto outdrop;
4605 		}
4606 
4607 		error = VNOP_IOCTL(vp, cmd, ioctl_arg, 0, &context);
4608 		(void)vnode_put(vp);
4609 
4610 		break;
4611 	}
4612 
4613 	/*
4614 	 * SPI (private) for indicating to the lower level storage driver that the
4615 	 * subsequent writes should be of a particular IO type (burst, greedy, static),
4616 	 * or other flavors that may be necessary.
4617 	 */
4618 	case F_SETIOTYPE: {
4619 		caddr_t param_ptr;
4620 		uint32_t param;
4621 
4622 		if (uap->arg) {
4623 			/* extract 32 bits of flags from userland */
4624 			param_ptr = (caddr_t) uap->arg;
4625 			param = (uint32_t) param_ptr;
4626 		} else {
4627 			/* If no argument is specified, error out */
4628 			error = EINVAL;
4629 			goto out;
4630 		}
4631 
4632 		/*
4633 		 * Validate the different types of flags that can be specified:
4634 		 * all of them are mutually exclusive for now.
4635 		 */
4636 		switch (param) {
4637 		case F_IOTYPE_ISOCHRONOUS:
4638 			break;
4639 
4640 		default:
4641 			error = EINVAL;
4642 			goto out;
4643 		}
4644 
4645 
4646 		if (fp->f_type != DTYPE_VNODE) {
4647 			error = EBADF;
4648 			goto out;
4649 		}
4650 		vp = (struct vnode *)fp_get_data(fp);
4651 		proc_fdunlock(p);
4652 
4653 		error = vnode_getwithref(vp);
4654 		if (error) {
4655 			error = ENOENT;
4656 			goto outdrop;
4657 		}
4658 
4659 		/* Only go forward if you have write access */
4660 		vfs_context_t ctx = vfs_context_current();
4661 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4662 			vnode_put(vp);
4663 			error = EBADF;
4664 			goto outdrop;
4665 		}
4666 
4667 		error = VNOP_IOCTL(vp, cmd, param_ptr, 0, &context);
4668 		(void)vnode_put(vp);
4669 
4670 		break;
4671 	}
4672 
4673 	/*
4674 	 * Set the vnode pointed to by 'fd'
4675 	 * and tag it as the (potentially future) backing store
4676 	 * for another filesystem
4677 	 */
4678 	case F_SETBACKINGSTORE: {
4679 		if (fp->f_type != DTYPE_VNODE) {
4680 			error = EBADF;
4681 			goto out;
4682 		}
4683 
4684 		vp = (struct vnode *)fp_get_data(fp);
4685 
4686 		if (vp->v_tag != VT_HFS) {
4687 			error = EINVAL;
4688 			goto out;
4689 		}
4690 		proc_fdunlock(p);
4691 
4692 		if (vnode_getwithref(vp)) {
4693 			error = ENOENT;
4694 			goto outdrop;
4695 		}
4696 
4697 		/* only proceed if you have write access */
4698 		vfs_context_t ctx = vfs_context_current();
4699 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4700 			vnode_put(vp);
4701 			error = EBADF;
4702 			goto outdrop;
4703 		}
4704 
4705 
4706 		/* If arg != 0, set, otherwise unset */
4707 		if (uap->arg) {
4708 			error = VNOP_IOCTL(vp, cmd, (caddr_t)1, 0, &context);
4709 		} else {
4710 			error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
4711 		}
4712 
4713 		vnode_put(vp);
4714 		break;
4715 	}
4716 
4717 	/*
4718 	 * like F_GETPATH, but special semantics for
4719 	 * the mobile time machine handler.
4720 	 */
4721 	case F_GETPATH_MTMINFO: {
4722 		char *pathbufp;
4723 		int pathlen;
4724 
4725 		if (fp->f_type != DTYPE_VNODE) {
4726 			error = EBADF;
4727 			goto out;
4728 		}
4729 		vp = (struct vnode *)fp_get_data(fp);
4730 		proc_fdunlock(p);
4731 
4732 		pathlen = MAXPATHLEN;
4733 		pathbufp = zalloc(ZV_NAMEI);
4734 
4735 		if ((error = vnode_getwithref(vp)) == 0) {
4736 			int backingstore = 0;
4737 
4738 			/* Check for error from vn_getpath before moving on */
4739 			if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
4740 				if (vp->v_tag == VT_HFS) {
4741 					error = VNOP_IOCTL(vp, cmd, (caddr_t) &backingstore, 0, &context);
4742 				}
4743 				(void)vnode_put(vp);
4744 
4745 				if (error == 0) {
4746 					error = copyout((caddr_t)pathbufp, argp, pathlen);
4747 				}
4748 				if (error == 0) {
4749 					/*
4750 					 * If the copyout was successful, now check to ensure
4751 					 * that this vnode is not a BACKINGSTORE vnode.  mtmd
4752 					 * wants the path regardless.
4753 					 */
4754 					if (backingstore) {
4755 						error = EBUSY;
4756 					}
4757 				}
4758 			} else {
4759 				(void)vnode_put(vp);
4760 			}
4761 		}
4762 
4763 		zfree(ZV_NAMEI, pathbufp);
4764 		goto outdrop;
4765 	}
4766 
4767 	case F_RECYCLE: {
4768 #if !DEBUG && !DEVELOPMENT
4769 		bool allowed = false;
4770 
4771 		//
4772 		// non-debug and non-development kernels have restrictions
4773 		// on who can all this fcntl.  the process has to be marked
4774 		// with the dataless-manipulator entitlement and either the
4775 		// process or thread have to be marked rapid-aging.
4776 		//
4777 		if (!vfs_context_is_dataless_manipulator(&context)) {
4778 			error = EPERM;
4779 			goto out;
4780 		}
4781 
4782 		proc_t proc = vfs_context_proc(&context);
4783 		if (proc && (proc->p_lflag & P_LRAGE_VNODES)) {
4784 			allowed = true;
4785 		} else {
4786 			thread_t thr = vfs_context_thread(&context);
4787 			if (thr) {
4788 				struct uthread *ut = get_bsdthread_info(thr);
4789 
4790 				if (ut && (ut->uu_flag & UT_RAGE_VNODES)) {
4791 					allowed = true;
4792 				}
4793 			}
4794 		}
4795 		if (!allowed) {
4796 			error = EPERM;
4797 			goto out;
4798 		}
4799 #endif
4800 
4801 		if (fp->f_type != DTYPE_VNODE) {
4802 			error = EBADF;
4803 			goto out;
4804 		}
4805 		vp = (struct vnode *)fp_get_data(fp);
4806 		proc_fdunlock(p);
4807 
4808 		vnode_recycle(vp);
4809 		break;
4810 	}
4811 
4812 	default:
4813 		/*
4814 		 * This is an fcntl() that we d not recognize at this level;
4815 		 * if this is a vnode, we send it down into the VNOP_IOCTL
4816 		 * for this vnode; this can include special devices, and will
4817 		 * effectively overload fcntl() to send ioctl()'s.
4818 		 */
4819 		if ((cmd & IOC_VOID) && (cmd & IOC_INOUT)) {
4820 			error = EINVAL;
4821 			goto out;
4822 		}
4823 
4824 		/* Catch any now-invalid fcntl() selectors */
4825 		switch (cmd) {
4826 		case (int)APFSIOC_REVERT_TO_SNAPSHOT:
4827 		case (int)FSIOC_FIOSEEKHOLE:
4828 		case (int)FSIOC_FIOSEEKDATA:
4829 		case (int)FSIOC_CAS_BSDFLAGS:
4830 		case HFS_GET_BOOT_INFO:
4831 		case HFS_SET_BOOT_INFO:
4832 		case FIOPINSWAP:
4833 		case F_MARKDEPENDENCY:
4834 		case TIOCREVOKE:
4835 		case TIOCREVOKECLEAR:
4836 			error = EINVAL;
4837 			goto out;
4838 		default:
4839 			break;
4840 		}
4841 
4842 		if (fp->f_type != DTYPE_VNODE) {
4843 			error = EBADF;
4844 			goto out;
4845 		}
4846 		vp = (struct vnode *)fp_get_data(fp);
4847 		proc_fdunlock(p);
4848 
4849 		if ((error = vnode_getwithref(vp)) == 0) {
4850 #define STK_PARAMS 128
4851 			char stkbuf[STK_PARAMS] = {0};
4852 			unsigned int size;
4853 			caddr_t data, memp;
4854 			/*
4855 			 * For this to work properly, we have to copy in the
4856 			 * ioctl() cmd argument if there is one; we must also
4857 			 * check that a command parameter, if present, does
4858 			 * not exceed the maximum command length dictated by
4859 			 * the number of bits we have available in the command
4860 			 * to represent a structure length.  Finally, we have
4861 			 * to copy the results back out, if it is that type of
4862 			 * ioctl().
4863 			 */
4864 			size = IOCPARM_LEN(cmd);
4865 			if (size > IOCPARM_MAX) {
4866 				(void)vnode_put(vp);
4867 				error = EINVAL;
4868 				break;
4869 			}
4870 
4871 			memp = NULL;
4872 			if (size > sizeof(stkbuf)) {
4873 				memp = (caddr_t)kalloc_data(size, Z_WAITOK);
4874 				if (memp == 0) {
4875 					(void)vnode_put(vp);
4876 					error = ENOMEM;
4877 					goto outdrop;
4878 				}
4879 				data = memp;
4880 			} else {
4881 				data = &stkbuf[0];
4882 			}
4883 
4884 			if (cmd & IOC_IN) {
4885 				if (size) {
4886 					/* structure */
4887 					error = copyin(argp, data, size);
4888 					if (error) {
4889 						(void)vnode_put(vp);
4890 						if (memp) {
4891 							kfree_data(memp, size);
4892 						}
4893 						goto outdrop;
4894 					}
4895 
4896 					/* Bzero the section beyond that which was needed */
4897 					if (size <= sizeof(stkbuf)) {
4898 						bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
4899 					}
4900 				} else {
4901 					/* int */
4902 					if (is64bit) {
4903 						*(user_addr_t *)data = argp;
4904 					} else {
4905 						*(uint32_t *)data = (uint32_t)argp;
4906 					}
4907 				};
4908 			} else if ((cmd & IOC_OUT) && size) {
4909 				/*
4910 				 * Zero the buffer so the user always
4911 				 * gets back something deterministic.
4912 				 */
4913 				bzero(data, size);
4914 			} else if (cmd & IOC_VOID) {
4915 				if (is64bit) {
4916 					*(user_addr_t *)data = argp;
4917 				} else {
4918 					*(uint32_t *)data = (uint32_t)argp;
4919 				}
4920 			}
4921 
4922 			error = VNOP_IOCTL(vp, cmd, CAST_DOWN(caddr_t, data), 0, &context);
4923 
4924 			(void)vnode_put(vp);
4925 
4926 			/* Copy any output data to user */
4927 			if (error == 0 && (cmd & IOC_OUT) && size) {
4928 				error = copyout(data, argp, size);
4929 			}
4930 			if (memp) {
4931 				kfree_data(memp, size);
4932 			}
4933 		}
4934 		break;
4935 	}
4936 
4937 outdrop:
4938 	return sys_fcntl_outdrop(p, fd, fp, vp, error);
4939 
4940 out:
4941 	return sys_fcntl_out(p, fd, fp, error);
4942 }
4943 
4944 
4945 /*
4946  * sys_close
4947  *
4948  * Description:	The implementation of the close(2) system call
4949  *
4950  * Parameters:	p			Process in whose per process file table
4951  *					the close is to occur
4952  *		uap->fd			fd to be closed
4953  *		retval			<unused>
4954  *
4955  * Returns:	0			Success
4956  *	fp_lookup:EBADF			Bad file descriptor
4957  *      fp_guard_exception:???          Guarded file descriptor
4958  *	close_internal:EBADF
4959  *	close_internal:???              Anything returnable by a per-fileops
4960  *					close function
4961  */
4962 int
sys_close(proc_t p,struct close_args * uap,__unused int32_t * retval)4963 sys_close(proc_t p, struct close_args *uap, __unused int32_t *retval)
4964 {
4965 	__pthread_testcancel(1);
4966 	return close_nocancel(p, uap->fd);
4967 }
4968 
4969 int
sys_close_nocancel(proc_t p,struct close_nocancel_args * uap,__unused int32_t * retval)4970 sys_close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
4971 {
4972 	return close_nocancel(p, uap->fd);
4973 }
4974 
4975 int
close_nocancel(proc_t p,int fd)4976 close_nocancel(proc_t p, int fd)
4977 {
4978 	struct fileproc *fp;
4979 
4980 	AUDIT_SYSCLOSE(p, fd);
4981 
4982 	proc_fdlock(p);
4983 	if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
4984 		proc_fdunlock(p);
4985 		return EBADF;
4986 	}
4987 
4988 	if (fp_isguarded(fp, GUARD_CLOSE)) {
4989 		int error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
4990 		proc_fdunlock(p);
4991 		return error;
4992 	}
4993 
4994 	return fp_close_and_unlock(p, fd, fp, 0);
4995 }
4996 
4997 
4998 /*
4999  * fstat
5000  *
5001  * Description:	Return status information about a file descriptor.
5002  *
5003  * Parameters:	p				The process doing the fstat
5004  *		fd				The fd to stat
5005  *		ub				The user stat buffer
5006  *		xsecurity			The user extended security
5007  *						buffer, or 0 if none
5008  *		xsecurity_size			The size of xsecurity, or 0
5009  *						if no xsecurity
5010  *		isstat64			Flag to indicate 64 bit version
5011  *						for inode size, etc.
5012  *
5013  * Returns:	0				Success
5014  *		EBADF
5015  *		EFAULT
5016  *	fp_lookup:EBADF				Bad file descriptor
5017  *	vnode_getwithref:???
5018  *	copyout:EFAULT
5019  *	vnode_getwithref:???
5020  *	vn_stat:???
5021  *	soo_stat:???
5022  *	pipe_stat:???
5023  *	pshm_stat:???
5024  *	kqueue_stat:???
5025  *
5026  * Notes:	Internal implementation for all other fstat() related
5027  *		functions
5028  *
5029  *		XXX switch on node type is bogus; need a stat in struct
5030  *		XXX fileops instead.
5031  */
5032 static int
fstat(proc_t p,int fd,user_addr_t ub,user_addr_t xsecurity,user_addr_t xsecurity_size,int isstat64)5033 fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity,
5034     user_addr_t xsecurity_size, int isstat64)
5035 {
5036 	struct fileproc *fp;
5037 	union {
5038 		struct stat sb;
5039 		struct stat64 sb64;
5040 	} source;
5041 	union {
5042 		struct user64_stat user64_sb;
5043 		struct user32_stat user32_sb;
5044 		struct user64_stat64 user64_sb64;
5045 		struct user32_stat64 user32_sb64;
5046 	} dest;
5047 	int error, my_size;
5048 	file_type_t type;
5049 	caddr_t data;
5050 	kauth_filesec_t fsec;
5051 	user_size_t xsecurity_bufsize;
5052 	vfs_context_t ctx = vfs_context_current();
5053 	void * sbptr;
5054 
5055 
5056 	AUDIT_ARG(fd, fd);
5057 
5058 	if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
5059 		return error;
5060 	}
5061 	type = fp->f_type;
5062 	data = (caddr_t)fp_get_data(fp);
5063 	fsec = KAUTH_FILESEC_NONE;
5064 
5065 	sbptr = (void *)&source;
5066 
5067 	switch (type) {
5068 	case DTYPE_VNODE:
5069 		if ((error = vnode_getwithref((vnode_t)data)) == 0) {
5070 			/*
5071 			 * If the caller has the file open, and is not
5072 			 * requesting extended security information, we are
5073 			 * going to let them get the basic stat information.
5074 			 */
5075 			if (xsecurity == USER_ADDR_NULL) {
5076 				error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
5077 				    fp->fp_glob->fg_cred);
5078 			} else {
5079 				error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
5080 			}
5081 
5082 			AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
5083 			(void)vnode_put((vnode_t)data);
5084 		}
5085 		break;
5086 
5087 #if SOCKETS
5088 	case DTYPE_SOCKET:
5089 		error = soo_stat((struct socket *)data, sbptr, isstat64);
5090 		break;
5091 #endif /* SOCKETS */
5092 
5093 	case DTYPE_PIPE:
5094 		error = pipe_stat((void *)data, sbptr, isstat64);
5095 		break;
5096 
5097 	case DTYPE_PSXSHM:
5098 		error = pshm_stat((void *)data, sbptr, isstat64);
5099 		break;
5100 
5101 	case DTYPE_KQUEUE:
5102 		error = kqueue_stat((void *)data, sbptr, isstat64, p);
5103 		break;
5104 
5105 	default:
5106 		error = EBADF;
5107 		goto out;
5108 	}
5109 	if (error == 0) {
5110 		caddr_t sbp;
5111 
5112 		if (isstat64 != 0) {
5113 			source.sb64.st_lspare = 0;
5114 			source.sb64.st_qspare[0] = 0LL;
5115 			source.sb64.st_qspare[1] = 0LL;
5116 
5117 			if (IS_64BIT_PROCESS(p)) {
5118 				munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5119 				my_size = sizeof(dest.user64_sb64);
5120 				sbp = (caddr_t)&dest.user64_sb64;
5121 			} else {
5122 				munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5123 				my_size = sizeof(dest.user32_sb64);
5124 				sbp = (caddr_t)&dest.user32_sb64;
5125 			}
5126 		} else {
5127 			source.sb.st_lspare = 0;
5128 			source.sb.st_qspare[0] = 0LL;
5129 			source.sb.st_qspare[1] = 0LL;
5130 			if (IS_64BIT_PROCESS(p)) {
5131 				munge_user64_stat(&source.sb, &dest.user64_sb);
5132 				my_size = sizeof(dest.user64_sb);
5133 				sbp = (caddr_t)&dest.user64_sb;
5134 			} else {
5135 				munge_user32_stat(&source.sb, &dest.user32_sb);
5136 				my_size = sizeof(dest.user32_sb);
5137 				sbp = (caddr_t)&dest.user32_sb;
5138 			}
5139 		}
5140 
5141 		error = copyout(sbp, ub, my_size);
5142 	}
5143 
5144 	/* caller wants extended security information? */
5145 	if (xsecurity != USER_ADDR_NULL) {
5146 		/* did we get any? */
5147 		if (fsec == KAUTH_FILESEC_NONE) {
5148 			if (susize(xsecurity_size, 0) != 0) {
5149 				error = EFAULT;
5150 				goto out;
5151 			}
5152 		} else {
5153 			/* find the user buffer size */
5154 			xsecurity_bufsize = fusize(xsecurity_size);
5155 
5156 			/* copy out the actual data size */
5157 			if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5158 				error = EFAULT;
5159 				goto out;
5160 			}
5161 
5162 			/* if the caller supplied enough room, copy out to it */
5163 			if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
5164 				error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5165 			}
5166 		}
5167 	}
5168 out:
5169 	fp_drop(p, fd, fp, 0);
5170 	if (fsec != NULL) {
5171 		kauth_filesec_free(fsec);
5172 	}
5173 	return error;
5174 }
5175 
5176 
5177 /*
5178  * sys_fstat_extended
5179  *
5180  * Description:	Extended version of fstat supporting returning extended
5181  *		security information
5182  *
5183  * Parameters:	p				The process doing the fstat
5184  *		uap->fd				The fd to stat
5185  *		uap->ub				The user stat buffer
5186  *		uap->xsecurity			The user extended security
5187  *						buffer, or 0 if none
5188  *		uap->xsecurity_size		The size of xsecurity, or 0
5189  *
5190  * Returns:	0				Success
5191  *		!0				Errno (see fstat)
5192  */
5193 int
sys_fstat_extended(proc_t p,struct fstat_extended_args * uap,__unused int32_t * retval)5194 sys_fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
5195 {
5196 	return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
5197 }
5198 
5199 
5200 /*
5201  * sys_fstat
5202  *
5203  * Description:	Get file status for the file associated with fd
5204  *
5205  * Parameters:	p				The process doing the fstat
5206  *		uap->fd				The fd to stat
5207  *		uap->ub				The user stat buffer
5208  *
5209  * Returns:	0				Success
5210  *		!0				Errno (see fstat)
5211  */
5212 int
sys_fstat(proc_t p,struct fstat_args * uap,__unused int32_t * retval)5213 sys_fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
5214 {
5215 	return fstat(p, uap->fd, uap->ub, 0, 0, 0);
5216 }
5217 
5218 
5219 /*
5220  * sys_fstat64_extended
5221  *
5222  * Description:	Extended version of fstat64 supporting returning extended
5223  *		security information
5224  *
5225  * Parameters:	p				The process doing the fstat
5226  *		uap->fd				The fd to stat
5227  *		uap->ub				The user stat buffer
5228  *		uap->xsecurity			The user extended security
5229  *						buffer, or 0 if none
5230  *		uap->xsecurity_size		The size of xsecurity, or 0
5231  *
5232  * Returns:	0				Success
5233  *		!0				Errno (see fstat)
5234  */
5235 int
sys_fstat64_extended(proc_t p,struct fstat64_extended_args * uap,__unused int32_t * retval)5236 sys_fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
5237 {
5238 	return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
5239 }
5240 
5241 
5242 /*
5243  * sys_fstat64
5244  *
5245  * Description:	Get 64 bit version of the file status for the file associated
5246  *		with fd
5247  *
5248  * Parameters:	p				The process doing the fstat
5249  *		uap->fd				The fd to stat
5250  *		uap->ub				The user stat buffer
5251  *
5252  * Returns:	0				Success
5253  *		!0				Errno (see fstat)
5254  */
5255 int
sys_fstat64(proc_t p,struct fstat64_args * uap,__unused int32_t * retval)5256 sys_fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
5257 {
5258 	return fstat(p, uap->fd, uap->ub, 0, 0, 1);
5259 }
5260 
5261 
5262 /*
5263  * sys_fpathconf
5264  *
5265  * Description:	Return pathconf information about a file descriptor.
5266  *
5267  * Parameters:	p				Process making the request
5268  *		uap->fd				fd to get information about
5269  *		uap->name			Name of information desired
5270  *		retval				Pointer to the call return area
5271  *
5272  * Returns:	0				Success
5273  *		EINVAL
5274  *	fp_lookup:EBADF				Bad file descriptor
5275  *	vnode_getwithref:???
5276  *	vn_pathconf:???
5277  *
5278  * Implicit returns:
5279  *		*retval (modified)		Returned information (numeric)
5280  */
5281 int
sys_fpathconf(proc_t p,struct fpathconf_args * uap,int32_t * retval)5282 sys_fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
5283 {
5284 	int fd = uap->fd;
5285 	struct fileproc *fp;
5286 	struct vnode *vp;
5287 	int error = 0;
5288 	file_type_t type;
5289 
5290 
5291 	AUDIT_ARG(fd, uap->fd);
5292 	if ((error = fp_lookup(p, fd, &fp, 0))) {
5293 		return error;
5294 	}
5295 	type = fp->f_type;
5296 
5297 	switch (type) {
5298 	case DTYPE_SOCKET:
5299 		if (uap->name != _PC_PIPE_BUF) {
5300 			error = EINVAL;
5301 			goto out;
5302 		}
5303 		*retval = PIPE_BUF;
5304 		error = 0;
5305 		goto out;
5306 
5307 	case DTYPE_PIPE:
5308 		if (uap->name != _PC_PIPE_BUF) {
5309 			error = EINVAL;
5310 			goto out;
5311 		}
5312 		*retval = PIPE_BUF;
5313 		error = 0;
5314 		goto out;
5315 
5316 	case DTYPE_VNODE:
5317 		vp = (struct vnode *)fp_get_data(fp);
5318 
5319 		if ((error = vnode_getwithref(vp)) == 0) {
5320 			AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5321 
5322 			error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
5323 
5324 			(void)vnode_put(vp);
5325 		}
5326 		goto out;
5327 
5328 	default:
5329 		error = EINVAL;
5330 		goto out;
5331 	}
5332 	/*NOTREACHED*/
5333 out:
5334 	fp_drop(p, fd, fp, 0);
5335 	return error;
5336 }
5337 
5338 /*
5339  * sys_flock
5340  *
5341  * Description:	Apply an advisory lock on a file descriptor.
5342  *
5343  * Parameters:	p				Process making request
5344  *		uap->fd				fd on which the lock is to be
5345  *						attempted
5346  *		uap->how			(Un)Lock bits, including type
5347  *		retval				Pointer to the call return area
5348  *
5349  * Returns:	0				Success
5350  *	fp_getfvp:EBADF				Bad file descriptor
5351  *	fp_getfvp:ENOTSUP			fd does not refer to a vnode
5352  *	vnode_getwithref:???
5353  *	VNOP_ADVLOCK:???
5354  *
5355  * Implicit returns:
5356  *		*retval (modified)		Size of dtable
5357  *
5358  * Notes:	Just attempt to get a record lock of the requested type on
5359  *		the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5360  */
5361 int
sys_flock(proc_t p,struct flock_args * uap,__unused int32_t * retval)5362 sys_flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5363 {
5364 	int fd = uap->fd;
5365 	int how = uap->how;
5366 	struct fileproc *fp;
5367 	struct vnode *vp;
5368 	struct flock lf;
5369 	vfs_context_t ctx = vfs_context_current();
5370 	int error = 0;
5371 
5372 	AUDIT_ARG(fd, uap->fd);
5373 	if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5374 		return error;
5375 	}
5376 	if ((error = vnode_getwithref(vp))) {
5377 		goto out1;
5378 	}
5379 	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5380 
5381 	lf.l_whence = SEEK_SET;
5382 	lf.l_start = 0;
5383 	lf.l_len = 0;
5384 	if (how & LOCK_UN) {
5385 		lf.l_type = F_UNLCK;
5386 		error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5387 		goto out;
5388 	}
5389 	if (how & LOCK_EX) {
5390 		lf.l_type = F_WRLCK;
5391 	} else if (how & LOCK_SH) {
5392 		lf.l_type = F_RDLCK;
5393 	} else {
5394 		error = EBADF;
5395 		goto out;
5396 	}
5397 #if CONFIG_MACF
5398 	error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob, F_SETLK, &lf);
5399 	if (error) {
5400 		goto out;
5401 	}
5402 #endif
5403 	error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
5404 	    (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5405 	    ctx, NULL);
5406 	if (!error) {
5407 		os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
5408 	}
5409 out:
5410 	(void)vnode_put(vp);
5411 out1:
5412 	fp_drop(p, fd, fp, 0);
5413 	return error;
5414 }
5415 
5416 /*
5417  * sys_fileport_makeport
5418  *
5419  * Description: Obtain a Mach send right for a given file descriptor.
5420  *
5421  * Parameters:	p		Process calling fileport
5422  *              uap->fd		The fd to reference
5423  *              uap->portnamep  User address at which to place port name.
5424  *
5425  * Returns:	0		Success.
5426  *              EBADF		Bad file descriptor.
5427  *              EINVAL		File descriptor had type that cannot be sent, misc. other errors.
5428  *              EFAULT		Address at which to store port name is not valid.
5429  *              EAGAIN		Resource shortage.
5430  *
5431  * Implicit returns:
5432  *		On success, name of send right is stored at user-specified address.
5433  */
5434 int
sys_fileport_makeport(proc_t p,struct fileport_makeport_args * uap,__unused int * retval)5435 sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5436     __unused int *retval)
5437 {
5438 	int err;
5439 	int fd = uap->fd;
5440 	user_addr_t user_portaddr = uap->portnamep;
5441 	struct fileproc *fp = FILEPROC_NULL;
5442 	struct fileglob *fg = NULL;
5443 	ipc_port_t fileport;
5444 	mach_port_name_t name = MACH_PORT_NULL;
5445 
5446 	proc_fdlock(p);
5447 	err = fp_lookup(p, fd, &fp, 1);
5448 	if (err != 0) {
5449 		goto out_unlock;
5450 	}
5451 
5452 	fg = fp->fp_glob;
5453 	if (!fg_sendable(fg)) {
5454 		err = EINVAL;
5455 		goto out_unlock;
5456 	}
5457 
5458 	if (fp_isguarded(fp, GUARD_FILEPORT)) {
5459 		err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5460 		goto out_unlock;
5461 	}
5462 
5463 	/* Dropped when port is deallocated */
5464 	fg_ref(p, fg);
5465 
5466 	proc_fdunlock(p);
5467 
5468 	/* Allocate and initialize a port */
5469 	fileport = fileport_alloc(fg);
5470 	if (fileport == IPC_PORT_NULL) {
5471 		fg_drop_live(fg);
5472 		err = EAGAIN;
5473 		goto out;
5474 	}
5475 
5476 	/* Add an entry.  Deallocates port on failure. */
5477 	name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
5478 	if (!MACH_PORT_VALID(name)) {
5479 		err = EINVAL;
5480 		goto out;
5481 	}
5482 
5483 	err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5484 	if (err != 0) {
5485 		goto out;
5486 	}
5487 
5488 	/* Tag the fileglob for debugging purposes */
5489 	lck_mtx_lock_spin(&fg->fg_lock);
5490 	fg->fg_lflags |= FG_PORTMADE;
5491 	lck_mtx_unlock(&fg->fg_lock);
5492 
5493 	fp_drop(p, fd, fp, 0);
5494 
5495 	return 0;
5496 
5497 out_unlock:
5498 	proc_fdunlock(p);
5499 out:
5500 	if (MACH_PORT_VALID(name)) {
5501 		/* Don't care if another thread races us to deallocate the entry */
5502 		(void) mach_port_deallocate(get_task_ipcspace(p->task), name);
5503 	}
5504 
5505 	if (fp != FILEPROC_NULL) {
5506 		fp_drop(p, fd, fp, 0);
5507 	}
5508 
5509 	return err;
5510 }
5511 
5512 void
fileport_releasefg(struct fileglob * fg)5513 fileport_releasefg(struct fileglob *fg)
5514 {
5515 	(void)fg_drop(PROC_NULL, fg);
5516 }
5517 
5518 /*
5519  * fileport_makefd
5520  *
5521  * Description: Obtain the file descriptor for a given Mach send right.
5522  *
5523  * Returns:	0		Success
5524  *		EINVAL		Invalid Mach port name, or port is not for a file.
5525  *	fdalloc:EMFILE
5526  *	fdalloc:ENOMEM		Unable to allocate fileproc or extend file table.
5527  *
5528  * Implicit returns:
5529  *		*retval (modified)		The new descriptor
5530  */
5531 int
fileport_makefd(proc_t p,ipc_port_t port,fileproc_flags_t fp_flags,int * retval)5532 fileport_makefd(proc_t p, ipc_port_t port, fileproc_flags_t fp_flags, int *retval)
5533 {
5534 	struct fileglob *fg;
5535 	struct fileproc *fp = FILEPROC_NULL;
5536 	int fd;
5537 	int err;
5538 
5539 	fg = fileport_port_to_fileglob(port);
5540 	if (fg == NULL) {
5541 		err = EINVAL;
5542 		goto out;
5543 	}
5544 
5545 	fp = fileproc_alloc_init();
5546 
5547 	proc_fdlock(p);
5548 	err = fdalloc(p, 0, &fd);
5549 	if (err != 0) {
5550 		proc_fdunlock(p);
5551 		goto out;
5552 	}
5553 	if (fp_flags) {
5554 		fp->fp_flags |= fp_flags;
5555 	}
5556 
5557 	fp->fp_glob = fg;
5558 	fg_ref(p, fg);
5559 
5560 	procfdtbl_releasefd(p, fd, fp);
5561 	proc_fdunlock(p);
5562 
5563 	*retval = fd;
5564 	err = 0;
5565 out:
5566 	if ((fp != NULL) && (0 != err)) {
5567 		fileproc_free(fp);
5568 	}
5569 
5570 	return err;
5571 }
5572 
5573 /*
5574  * sys_fileport_makefd
5575  *
5576  * Description: Obtain the file descriptor for a given Mach send right.
5577  *
5578  * Parameters:	p		Process calling fileport
5579  *              uap->port	Name of send right to file port.
5580  *
5581  * Returns:	0		Success
5582  *		EINVAL		Invalid Mach port name, or port is not for a file.
5583  *	fdalloc:EMFILE
5584  *	fdalloc:ENOMEM		Unable to allocate fileproc or extend file table.
5585  *
5586  * Implicit returns:
5587  *		*retval (modified)		The new descriptor
5588  */
5589 int
sys_fileport_makefd(proc_t p,struct fileport_makefd_args * uap,int32_t * retval)5590 sys_fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5591 {
5592 	ipc_port_t port = IPC_PORT_NULL;
5593 	mach_port_name_t send = uap->port;
5594 	kern_return_t res;
5595 	int err;
5596 
5597 	res = ipc_object_copyin(get_task_ipcspace(p->task),
5598 	    send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
5599 
5600 	if (res == KERN_SUCCESS) {
5601 		err = fileport_makefd(p, port, FP_CLOEXEC, retval);
5602 	} else {
5603 		err = EINVAL;
5604 	}
5605 
5606 	if (IPC_PORT_NULL != port) {
5607 		ipc_port_release_send(port);
5608 	}
5609 
5610 	return err;
5611 }
5612 
5613 
5614 #pragma mark fileops wrappers
5615 
5616 /*
5617  * fo_read
5618  *
5619  * Description:	Generic fileops read indirected through the fileops pointer
5620  *		in the fileproc structure
5621  *
5622  * Parameters:	fp				fileproc structure pointer
5623  *		uio				user I/O structure pointer
5624  *		flags				FOF_ flags
5625  *		ctx				VFS context for operation
5626  *
5627  * Returns:	0				Success
5628  *		!0				Errno from read
5629  */
5630 int
fo_read(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5631 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5632 {
5633 	return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
5634 }
5635 
5636 int
fo_no_read(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5637 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5638 {
5639 #pragma unused(fp, uio, flags, ctx)
5640 	return ENXIO;
5641 }
5642 
5643 
5644 /*
5645  * fo_write
5646  *
5647  * Description:	Generic fileops write indirected through the fileops pointer
5648  *		in the fileproc structure
5649  *
5650  * Parameters:	fp				fileproc structure pointer
5651  *		uio				user I/O structure pointer
5652  *		flags				FOF_ flags
5653  *		ctx				VFS context for operation
5654  *
5655  * Returns:	0				Success
5656  *		!0				Errno from write
5657  */
5658 int
fo_write(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5659 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5660 {
5661 	return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
5662 }
5663 
5664 int
fo_no_write(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5665 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5666 {
5667 #pragma unused(fp, uio, flags, ctx)
5668 	return ENXIO;
5669 }
5670 
5671 
5672 /*
5673  * fo_ioctl
5674  *
5675  * Description:	Generic fileops ioctl indirected through the fileops pointer
5676  *		in the fileproc structure
5677  *
5678  * Parameters:	fp				fileproc structure pointer
5679  *		com				ioctl command
5680  *		data				pointer to internalized copy
5681  *						of user space ioctl command
5682  *						parameter data in kernel space
5683  *		ctx				VFS context for operation
5684  *
5685  * Returns:	0				Success
5686  *		!0				Errno from ioctl
5687  *
5688  * Locks:	The caller is assumed to have held the proc_fdlock; this
5689  *		function releases and reacquires this lock.  If the caller
5690  *		accesses data protected by this lock prior to calling this
5691  *		function, it will need to revalidate/reacquire any cached
5692  *		protected data obtained prior to the call.
5693  */
5694 int
fo_ioctl(struct fileproc * fp,u_long com,caddr_t data,vfs_context_t ctx)5695 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5696 {
5697 	int error;
5698 
5699 	proc_fdunlock(vfs_context_proc(ctx));
5700 	error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5701 	proc_fdlock(vfs_context_proc(ctx));
5702 	return error;
5703 }
5704 
5705 int
fo_no_ioctl(struct fileproc * fp,u_long com,caddr_t data,vfs_context_t ctx)5706 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5707 {
5708 #pragma unused(fp, com, data, ctx)
5709 	return ENOTTY;
5710 }
5711 
5712 
5713 /*
5714  * fo_select
5715  *
5716  * Description:	Generic fileops select indirected through the fileops pointer
5717  *		in the fileproc structure
5718  *
5719  * Parameters:	fp				fileproc structure pointer
5720  *		which				select which
5721  *		wql				pointer to wait queue list
5722  *		ctx				VFS context for operation
5723  *
5724  * Returns:	0				Success
5725  *		!0				Errno from select
5726  */
5727 int
fo_select(struct fileproc * fp,int which,void * wql,vfs_context_t ctx)5728 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5729 {
5730 	return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
5731 }
5732 
5733 int
fo_no_select(struct fileproc * fp,int which,void * wql,vfs_context_t ctx)5734 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5735 {
5736 #pragma unused(fp, which, wql, ctx)
5737 	return ENOTSUP;
5738 }
5739 
5740 
5741 /*
5742  * fo_close
5743  *
5744  * Description:	Generic fileops close indirected through the fileops pointer
5745  *		in the fileproc structure
5746  *
5747  * Parameters:	fp				fileproc structure pointer for
5748  *						file to close
5749  *		ctx				VFS context for operation
5750  *
5751  * Returns:	0				Success
5752  *		!0				Errno from close
5753  */
5754 int
fo_close(struct fileglob * fg,vfs_context_t ctx)5755 fo_close(struct fileglob *fg, vfs_context_t ctx)
5756 {
5757 	return (*fg->fg_ops->fo_close)(fg, ctx);
5758 }
5759 
5760 
5761 /*
5762  * fo_drain
5763  *
5764  * Description:	Generic fileops kqueue filter indirected through the fileops
5765  *		pointer in the fileproc structure
5766  *
5767  * Parameters:	fp				fileproc structure pointer
5768  *		ctx				VFS context for operation
5769  *
5770  * Returns:	0				Success
5771  *		!0				errno from drain
5772  */
5773 int
fo_drain(struct fileproc * fp,vfs_context_t ctx)5774 fo_drain(struct fileproc *fp, vfs_context_t ctx)
5775 {
5776 	return (*fp->f_ops->fo_drain)(fp, ctx);
5777 }
5778 
5779 int
fo_no_drain(struct fileproc * fp,vfs_context_t ctx)5780 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
5781 {
5782 #pragma unused(fp, ctx)
5783 	return ENOTSUP;
5784 }
5785 
5786 
5787 /*
5788  * fo_kqfilter
5789  *
5790  * Description:	Generic fileops kqueue filter indirected through the fileops
5791  *		pointer in the fileproc structure
5792  *
5793  * Parameters:	fp				fileproc structure pointer
5794  *		kn				pointer to knote to filter on
5795  *
5796  * Returns:	(kn->kn_flags & EV_ERROR)	error in kn->kn_data
5797  *		0				Filter is not active
5798  *		!0				Filter is active
5799  */
5800 int
fo_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)5801 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5802 {
5803 	return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
5804 }
5805 
5806 int
fo_no_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)5807 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5808 {
5809 #pragma unused(fp, kev)
5810 	knote_set_error(kn, ENOTSUP);
5811 	return 0;
5812 }
5813