xref: /xnu-10002.81.5/bsd/kern/kern_descrip.c (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 /*
2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  * (c) UNIX System Laboratories, Inc.
33  * All or some portions of this file are derived from material licensed
34  * to the University of California by American Telephone and Telegraph
35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36  * the permission of UNIX System Laboratories, Inc.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. All advertising materials mentioning features or use of this software
47  *    must display the following acknowledgement:
48  *	This product includes software developed by the University of
49  *	California, Berkeley and its contributors.
50  * 4. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)kern_descrip.c	8.8 (Berkeley) 2/14/95
67  */
68 /*
69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70  * support for mandatory and extensible security protections.  This notice
71  * is included in support of clause 2.2 (b) of the Apple Public License,
72  * Version 2.0.
73  */
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/kauth.h>
82 #include <sys/file_internal.h>
83 #include <sys/guarded.h>
84 #include <sys/priv.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/stat.h>
88 #include <sys/ioctl.h>
89 #include <sys/fcntl.h>
90 #include <sys/fsctl.h>
91 #include <sys/malloc.h>
92 #include <sys/mman.h>
93 #include <sys/mount.h>
94 #include <sys/syslog.h>
95 #include <sys/unistd.h>
96 #include <sys/resourcevar.h>
97 #include <sys/aio_kern.h>
98 #include <sys/ev.h>
99 #include <kern/locks.h>
100 #include <sys/uio_internal.h>
101 #include <sys/codesign.h>
102 #include <sys/codedir_internal.h>
103 #include <sys/mount_internal.h>
104 #include <sys/kdebug.h>
105 #include <sys/sysproto.h>
106 #include <sys/pipe.h>
107 #include <sys/spawn.h>
108 #include <sys/cprotect.h>
109 #include <sys/ubc_internal.h>
110 
111 #include <kern/kern_types.h>
112 #include <kern/kalloc.h>
113 #include <kern/waitq.h>
114 #include <kern/ipc_misc.h>
115 #include <kern/ast.h>
116 
117 #include <vm/vm_protos.h>
118 #include <mach/mach_port.h>
119 
120 #include <security/audit/audit.h>
121 #if CONFIG_MACF
122 #include <security/mac_framework.h>
123 #endif
124 
125 #include <stdbool.h>
126 #include <os/atomic_private.h>
127 #include <os/overflow.h>
128 #include <IOKit/IOBSD.h>
129 
130 #define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
131 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
132     mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
133 void ipc_port_release_send(ipc_port_t);
134 
135 void fileport_releasefg(struct fileglob *fg);
136 
137 /* flags for fp_close_and_unlock */
138 #define FD_DUP2RESV 1
139 
140 /* We don't want these exported */
141 
142 __private_extern__
143 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
144 
145 /* Conflict wait queue for when selects collide (opaque type) */
146 extern struct waitq select_conflict_queue;
147 
148 #define f_flag fp_glob->fg_flag
149 #define f_type fp_glob->fg_ops->fo_type
150 #define f_cred fp_glob->fg_cred
151 #define f_ops fp_glob->fg_ops
152 #define f_offset fp_glob->fg_offset
153 
154 ZONE_DEFINE_TYPE(fg_zone, "fileglob", struct fileglob, ZC_ZFREE_CLEARMEM);
155 ZONE_DEFINE_ID(ZONE_ID_FILEPROC, "fileproc", struct fileproc, ZC_ZFREE_CLEARMEM);
156 
157 /*
158  * Descriptor management.
159  */
160 int nfiles;                     /* actual number of open files */
161 /*
162  * "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
163  */
164 static const struct fileops uninitops;
165 
166 os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
167 static LCK_GRP_DECLARE(file_lck_grp, "file");
168 
169 
170 #pragma mark fileglobs
171 
172 /*!
173  * @function fg_free
174  *
175  * @brief
176  * Free a file structure.
177  */
178 static void
fg_free(struct fileglob * fg)179 fg_free(struct fileglob *fg)
180 {
181 	os_atomic_dec(&nfiles, relaxed);
182 
183 	if (fg->fg_vn_data) {
184 		fg_vn_data_free(fg->fg_vn_data);
185 		fg->fg_vn_data = NULL;
186 	}
187 
188 	kauth_cred_t cred = fg->fg_cred;
189 	if (IS_VALID_CRED(cred)) {
190 		kauth_cred_unref(&cred);
191 		fg->fg_cred = NOCRED;
192 	}
193 	lck_mtx_destroy(&fg->fg_lock, &file_lck_grp);
194 
195 #if CONFIG_MACF && CONFIG_VNGUARD
196 	vng_file_label_destroy(fg);
197 #endif
198 	zfree(fg_zone, fg);
199 }
200 
201 OS_ALWAYS_INLINE
202 void
fg_ref(proc_t p,struct fileglob * fg)203 fg_ref(proc_t p, struct fileglob *fg)
204 {
205 #if DEBUG || DEVELOPMENT
206 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
207 #else
208 	(void)p;
209 #endif
210 	os_ref_retain_raw(&fg->fg_count, &f_refgrp);
211 }
212 
213 void
fg_drop_live(struct fileglob * fg)214 fg_drop_live(struct fileglob *fg)
215 {
216 	os_ref_release_live_raw(&fg->fg_count, &f_refgrp);
217 }
218 
219 int
fg_drop(proc_t p,struct fileglob * fg)220 fg_drop(proc_t p, struct fileglob *fg)
221 {
222 	struct vnode *vp;
223 	struct vfs_context context;
224 	int error = 0;
225 
226 	if (fg == NULL) {
227 		return 0;
228 	}
229 
230 	/* Set up context with cred stashed in fg */
231 	if (p == current_proc()) {
232 		context.vc_thread = current_thread();
233 	} else {
234 		context.vc_thread = NULL;
235 	}
236 	context.vc_ucred = fg->fg_cred;
237 
238 	/*
239 	 * POSIX record locking dictates that any close releases ALL
240 	 * locks owned by this process.  This is handled by setting
241 	 * a flag in the unlock to free ONLY locks obeying POSIX
242 	 * semantics, and not to free BSD-style file locks.
243 	 * If the descriptor was in a message, POSIX-style locks
244 	 * aren't passed with the descriptor.
245 	 */
246 	if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
247 	    (p->p_ladvflag & P_LADVLOCK)) {
248 		struct flock lf = {
249 			.l_whence = SEEK_SET,
250 			.l_type = F_UNLCK,
251 		};
252 
253 		vp = (struct vnode *)fg_get_data(fg);
254 		if ((error = vnode_getwithref(vp)) == 0) {
255 			(void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
256 			(void)vnode_put(vp);
257 		}
258 	}
259 
260 	if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == 0) {
261 		/*
262 		 * Since we ensure that fg->fg_ops is always initialized,
263 		 * it is safe to invoke fo_close on the fg
264 		 */
265 		error = fo_close(fg, &context);
266 
267 		fg_free(fg);
268 	}
269 
270 	return error;
271 }
272 
273 inline
274 void
fg_set_data(struct fileglob * fg,void * fg_data)275 fg_set_data(
276 	struct fileglob *fg,
277 	void *fg_data)
278 {
279 	uintptr_t *store = &fg->fg_data;
280 
281 #if __has_feature(ptrauth_calls)
282 	int type = FILEGLOB_DTYPE(fg);
283 
284 	if (fg_data) {
285 		type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
286 		fg_data = ptrauth_sign_unauthenticated(fg_data,
287 		    ptrauth_key_process_independent_data,
288 		    ptrauth_blend_discriminator(store, type));
289 	}
290 #endif // __has_feature(ptrauth_calls)
291 
292 	*store = (uintptr_t)fg_data;
293 }
294 
295 inline
296 void *
fg_get_data_volatile(struct fileglob * fg)297 fg_get_data_volatile(struct fileglob *fg)
298 {
299 	uintptr_t *store = &fg->fg_data;
300 	void *fg_data = (void *)*store;
301 
302 #if __has_feature(ptrauth_calls)
303 	int type = FILEGLOB_DTYPE(fg);
304 
305 	if (fg_data) {
306 		type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
307 		fg_data = ptrauth_auth_data(fg_data,
308 		    ptrauth_key_process_independent_data,
309 		    ptrauth_blend_discriminator(store, type));
310 	}
311 #endif // __has_feature(ptrauth_calls)
312 
313 	return fg_data;
314 }
315 
316 static void
fg_transfer_filelocks(proc_t p,struct fileglob * fg,thread_t thread)317 fg_transfer_filelocks(proc_t p, struct fileglob *fg, thread_t thread)
318 {
319 	struct vnode *vp;
320 	struct vfs_context context;
321 	struct proc *old_proc = current_proc();
322 
323 	assert(fg != NULL);
324 
325 	assert(p != old_proc);
326 	context.vc_thread = thread;
327 	context.vc_ucred = fg->fg_cred;
328 
329 	/* Transfer all POSIX Style locks to new proc */
330 	if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
331 	    (p->p_ladvflag & P_LADVLOCK)) {
332 		struct flock lf = {
333 			.l_whence = SEEK_SET,
334 			.l_start = 0,
335 			.l_len = 0,
336 			.l_type = F_TRANSFER,
337 		};
338 
339 		vp = (struct vnode *)fg_get_data(fg);
340 		if (vnode_getwithref(vp) == 0) {
341 			(void)VNOP_ADVLOCK(vp, (caddr_t)old_proc, F_TRANSFER, &lf, F_POSIX, &context, NULL);
342 			(void)vnode_put(vp);
343 		}
344 	}
345 
346 	/* Transfer all OFD Style locks to new proc */
347 	if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
348 	    (fg->fg_lflags & FG_HAS_OFDLOCK)) {
349 		struct flock lf = {
350 			.l_whence = SEEK_SET,
351 			.l_start = 0,
352 			.l_len = 0,
353 			.l_type = F_TRANSFER,
354 		};
355 
356 		vp = (struct vnode *)fg_get_data(fg);
357 		if (vnode_getwithref(vp) == 0) {
358 			(void)VNOP_ADVLOCK(vp, ofd_to_id(fg), F_TRANSFER, &lf, F_OFD_LOCK, &context, NULL);
359 			(void)vnode_put(vp);
360 		}
361 	}
362 	return;
363 }
364 
365 bool
fg_sendable(struct fileglob * fg)366 fg_sendable(struct fileglob *fg)
367 {
368 	switch (FILEGLOB_DTYPE(fg)) {
369 	case DTYPE_VNODE:
370 	case DTYPE_SOCKET:
371 	case DTYPE_PIPE:
372 	case DTYPE_PSXSHM:
373 	case DTYPE_NETPOLICY:
374 		return (fg->fg_lflags & FG_CONFINED) == 0;
375 
376 	default:
377 		return false;
378 	}
379 }
380 
381 #pragma mark file descriptor table (static helpers)
382 
383 static void
procfdtbl_reservefd(struct proc * p,int fd)384 procfdtbl_reservefd(struct proc * p, int fd)
385 {
386 	p->p_fd.fd_ofiles[fd] = NULL;
387 	p->p_fd.fd_ofileflags[fd] |= UF_RESERVED;
388 }
389 
390 void
procfdtbl_releasefd(struct proc * p,int fd,struct fileproc * fp)391 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
392 {
393 	if (fp != NULL) {
394 		p->p_fd.fd_ofiles[fd] = fp;
395 	}
396 	p->p_fd.fd_ofileflags[fd] &= ~UF_RESERVED;
397 	if ((p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
398 		p->p_fd.fd_ofileflags[fd] &= ~UF_RESVWAIT;
399 		wakeup(&p->p_fd);
400 	}
401 }
402 
403 static void
procfdtbl_waitfd(struct proc * p,int fd)404 procfdtbl_waitfd(struct proc * p, int fd)
405 {
406 	p->p_fd.fd_ofileflags[fd] |= UF_RESVWAIT;
407 	msleep(&p->p_fd, &p->p_fd.fd_lock, PRIBIO, "ftbl_waitfd", NULL);
408 }
409 
410 static void
procfdtbl_clearfd(struct proc * p,int fd)411 procfdtbl_clearfd(struct proc * p, int fd)
412 {
413 	int waiting;
414 
415 	waiting = (p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT);
416 	p->p_fd.fd_ofiles[fd] = NULL;
417 	p->p_fd.fd_ofileflags[fd] = 0;
418 	if (waiting == UF_RESVWAIT) {
419 		wakeup(&p->p_fd);
420 	}
421 }
422 
423 /*
424  * fdrelse
425  *
426  * Description:	Inline utility function to free an fd in a filedesc
427  *
428  * Parameters:	fdp				Pointer to filedesc fd lies in
429  *		fd				fd to free
430  *		reserv				fd should be reserved
431  *
432  * Returns:	void
433  *
434  * Locks:	Assumes proc_fdlock for process pointing to fdp is held by
435  *		the caller
436  */
437 void
fdrelse(struct proc * p,int fd)438 fdrelse(struct proc * p, int fd)
439 {
440 	struct filedesc *fdp = &p->p_fd;
441 	int nfd = 0;
442 
443 	if (fd < fdp->fd_freefile) {
444 		fdp->fd_freefile = fd;
445 	}
446 #if DIAGNOSTIC
447 	if (fd >= fdp->fd_afterlast) {
448 		panic("fdrelse: fd_afterlast inconsistent");
449 	}
450 #endif
451 	procfdtbl_clearfd(p, fd);
452 
453 	nfd = fdp->fd_afterlast;
454 	while (nfd > 0 && fdp->fd_ofiles[nfd - 1] == NULL &&
455 	    !(fdp->fd_ofileflags[nfd - 1] & UF_RESERVED)) {
456 		nfd--;
457 	}
458 	fdp->fd_afterlast = nfd;
459 
460 #if CONFIG_PROC_RESOURCE_LIMITS
461 	fdp->fd_nfiles_open--;
462 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
463 }
464 
465 
466 /*
467  * finishdup
468  *
469  * Description:	Common code for dup, dup2, and fcntl(F_DUPFD).
470  *
471  * Parameters:	p				Process performing the dup
472  *		old				The fd to dup
473  *		new				The fd to dup it to
474  *		fp_flags			Flags to augment the new fp
475  *		retval				Pointer to the call return area
476  *
477  * Returns:	0				Success
478  *		EBADF
479  *		ENOMEM
480  *
481  * Implicit returns:
482  *		*retval (modified)		The new descriptor
483  *
484  * Locks:	Assumes proc_fdlock for process pointing to fdp is held by
485  *		the caller
486  *
487  * Notes:	This function may drop and reacquire this lock; it is unsafe
488  *		for a caller to assume that other state protected by the lock
489  *		has not been subsequently changed out from under it.
490  */
491 static int
finishdup(proc_t p,struct filedesc * fdp,int old,int new,fileproc_flags_t fp_flags,int32_t * retval)492 finishdup(proc_t p, struct filedesc *fdp, int old, int new,
493     fileproc_flags_t fp_flags, int32_t *retval)
494 {
495 	struct fileproc *nfp;
496 	struct fileproc *ofp;
497 #if CONFIG_MACF
498 	int error;
499 	kauth_cred_t cred;
500 #endif
501 
502 #if DIAGNOSTIC
503 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
504 #endif
505 	if ((ofp = fdp->fd_ofiles[old]) == NULL ||
506 	    (fdp->fd_ofileflags[old] & UF_RESERVED)) {
507 		fdrelse(p, new);
508 		return EBADF;
509 	}
510 
511 #if CONFIG_MACF
512 	cred = kauth_cred_proc_ref(p);
513 	error = mac_file_check_dup(cred, ofp->fp_glob, new);
514 	kauth_cred_unref(&cred);
515 
516 	if (error) {
517 		fdrelse(p, new);
518 		return error;
519 	}
520 #endif
521 
522 	fg_ref(p, ofp->fp_glob);
523 
524 	proc_fdunlock(p);
525 
526 	nfp = fileproc_alloc_init();
527 
528 	if (fp_flags) {
529 		nfp->fp_flags |= fp_flags;
530 	}
531 	nfp->fp_glob = ofp->fp_glob;
532 
533 	proc_fdlock(p);
534 
535 #if DIAGNOSTIC
536 	if (fdp->fd_ofiles[new] != 0) {
537 		panic("finishdup: overwriting fd_ofiles with new %d", new);
538 	}
539 	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
540 		panic("finishdup: unreserved fileflags with new %d", new);
541 	}
542 #endif
543 
544 	if (new >= fdp->fd_afterlast) {
545 		fdp->fd_afterlast = new + 1;
546 	}
547 	procfdtbl_releasefd(p, new, nfp);
548 	*retval = new;
549 	return 0;
550 }
551 
552 
553 #pragma mark file descriptor table (exported functions)
554 
555 void
proc_dirs_lock_shared(proc_t p)556 proc_dirs_lock_shared(proc_t p)
557 {
558 	lck_rw_lock_shared(&p->p_fd.fd_dirs_lock);
559 }
560 
561 void
proc_dirs_unlock_shared(proc_t p)562 proc_dirs_unlock_shared(proc_t p)
563 {
564 	lck_rw_unlock_shared(&p->p_fd.fd_dirs_lock);
565 }
566 
567 void
proc_dirs_lock_exclusive(proc_t p)568 proc_dirs_lock_exclusive(proc_t p)
569 {
570 	lck_rw_lock_exclusive(&p->p_fd.fd_dirs_lock);
571 }
572 
573 void
proc_dirs_unlock_exclusive(proc_t p)574 proc_dirs_unlock_exclusive(proc_t p)
575 {
576 	lck_rw_unlock_exclusive(&p->p_fd.fd_dirs_lock);
577 }
578 
579 /*
580  * proc_fdlock, proc_fdlock_spin
581  *
582  * Description:	Lock to control access to the per process struct fileproc
583  *		and struct filedesc
584  *
585  * Parameters:	p				Process to take the lock on
586  *
587  * Returns:	void
588  *
589  * Notes:	The lock is initialized in forkproc() and destroyed in
590  *		reap_child_process().
591  */
592 void
proc_fdlock(proc_t p)593 proc_fdlock(proc_t p)
594 {
595 	lck_mtx_lock(&p->p_fd.fd_lock);
596 }
597 
598 void
proc_fdlock_spin(proc_t p)599 proc_fdlock_spin(proc_t p)
600 {
601 	lck_mtx_lock_spin(&p->p_fd.fd_lock);
602 }
603 
604 void
proc_fdlock_assert(proc_t p,int assertflags)605 proc_fdlock_assert(proc_t p, int assertflags)
606 {
607 	lck_mtx_assert(&p->p_fd.fd_lock, assertflags);
608 }
609 
610 
611 /*
612  * proc_fdunlock
613  *
614  * Description:	Unlock the lock previously locked by a call to proc_fdlock()
615  *
616  * Parameters:	p				Process to drop the lock on
617  *
618  * Returns:	void
619  */
620 void
proc_fdunlock(proc_t p)621 proc_fdunlock(proc_t p)
622 {
623 	lck_mtx_unlock(&p->p_fd.fd_lock);
624 }
625 
626 bool
fdt_available_locked(proc_t p,int n)627 fdt_available_locked(proc_t p, int n)
628 {
629 	struct filedesc *fdp = &p->p_fd;
630 	struct fileproc **fpp;
631 	char *flags;
632 	int i;
633 	int lim = proc_limitgetcur_nofile(p);
634 
635 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
636 		return true;
637 	}
638 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
639 	flags = &fdp->fd_ofileflags[fdp->fd_freefile];
640 	for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
641 		if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
642 			return true;
643 		}
644 	}
645 	return false;
646 }
647 
648 
649 struct fdt_iterator
fdt_next(proc_t p,int fd,bool only_settled)650 fdt_next(proc_t p, int fd, bool only_settled)
651 {
652 	struct fdt_iterator it;
653 	struct filedesc *fdp = &p->p_fd;
654 	struct fileproc *fp;
655 	int nfds = fdp->fd_afterlast;
656 
657 	while (++fd < nfds) {
658 		fp = fdp->fd_ofiles[fd];
659 		if (fp == NULL || fp->fp_glob == NULL) {
660 			continue;
661 		}
662 		if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
663 			continue;
664 		}
665 		it.fdti_fd = fd;
666 		it.fdti_fp = fp;
667 		return it;
668 	}
669 
670 	it.fdti_fd = nfds;
671 	it.fdti_fp = NULL;
672 	return it;
673 }
674 
675 struct fdt_iterator
fdt_prev(proc_t p,int fd,bool only_settled)676 fdt_prev(proc_t p, int fd, bool only_settled)
677 {
678 	struct fdt_iterator it;
679 	struct filedesc *fdp = &p->p_fd;
680 	struct fileproc *fp;
681 
682 	while (--fd >= 0) {
683 		fp = fdp->fd_ofiles[fd];
684 		if (fp == NULL || fp->fp_glob == NULL) {
685 			continue;
686 		}
687 		if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
688 			continue;
689 		}
690 		it.fdti_fd = fd;
691 		it.fdti_fp = fp;
692 		return it;
693 	}
694 
695 	it.fdti_fd = -1;
696 	it.fdti_fp = NULL;
697 	return it;
698 }
699 
700 void
fdt_init(proc_t p)701 fdt_init(proc_t p)
702 {
703 	struct filedesc *fdp = &p->p_fd;
704 
705 	lck_mtx_init(&fdp->fd_kqhashlock, &proc_kqhashlock_grp, &proc_lck_attr);
706 	lck_mtx_init(&fdp->fd_knhashlock, &proc_knhashlock_grp, &proc_lck_attr);
707 	lck_mtx_init(&fdp->fd_lock, &proc_fdmlock_grp, &proc_lck_attr);
708 	lck_rw_init(&fdp->fd_dirs_lock, &proc_dirslock_grp, &proc_lck_attr);
709 }
710 
711 void
fdt_destroy(proc_t p)712 fdt_destroy(proc_t p)
713 {
714 	struct filedesc *fdp = &p->p_fd;
715 
716 	lck_mtx_destroy(&fdp->fd_kqhashlock, &proc_kqhashlock_grp);
717 	lck_mtx_destroy(&fdp->fd_knhashlock, &proc_knhashlock_grp);
718 	lck_mtx_destroy(&fdp->fd_lock, &proc_fdmlock_grp);
719 	lck_rw_destroy(&fdp->fd_dirs_lock, &proc_dirslock_grp);
720 }
721 
722 void
fdt_exec(proc_t p,short posix_spawn_flags,thread_t thread,bool in_exec)723 fdt_exec(proc_t p, short posix_spawn_flags, thread_t thread, bool in_exec)
724 {
725 	struct filedesc *fdp = &p->p_fd;
726 	thread_t self = current_thread();
727 	struct uthread *ut = get_bsdthread_info(self);
728 	struct kqworkq *dealloc_kqwq = NULL;
729 
730 	/*
731 	 * If the current thread is bound as a workq/workloop
732 	 * servicing thread, we need to unbind it first.
733 	 */
734 	if (ut->uu_kqr_bound && get_bsdthreadtask_info(self) == p) {
735 		kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
736 	}
737 
738 	/*
739 	 * Deallocate the knotes for this process
740 	 * and mark the tables non-existent so
741 	 * subsequent kqueue closes go faster.
742 	 */
743 	knotes_dealloc(p);
744 	assert(fdp->fd_knlistsize == 0);
745 	assert(fdp->fd_knhashmask == 0);
746 
747 	proc_fdlock(p);
748 
749 	/* Set the P_LADVLOCK flag if the flag set on old proc */
750 	if (in_exec && (current_proc()->p_ladvflag & P_LADVLOCK)) {
751 		os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
752 	}
753 
754 	for (int i = fdp->fd_afterlast; i-- > 0;) {
755 		struct fileproc *fp = fdp->fd_ofiles[i];
756 		char *flagp = &fdp->fd_ofileflags[i];
757 		bool inherit_file = true;
758 
759 		if (fp == FILEPROC_NULL) {
760 			continue;
761 		}
762 
763 		/*
764 		 * no file descriptor should be in flux when in exec,
765 		 * because we stopped all other threads
766 		 */
767 		if (*flagp & ~UF_INHERIT) {
768 			panic("file %d/%p in flux during exec of %p", i, fp, p);
769 		}
770 
771 		if (fp->fp_flags & FP_CLOEXEC) {
772 			inherit_file = false;
773 		} else if ((posix_spawn_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) &&
774 		    !(*flagp & UF_INHERIT)) {
775 			/*
776 			 * Reverse the usual semantics of file descriptor
777 			 * inheritance - all of them should be closed
778 			 * except files marked explicitly as "inherit" and
779 			 * not marked close-on-exec.
780 			 */
781 			inherit_file = false;
782 #if CONFIG_MACF
783 		} else if (mac_file_check_inherit(proc_ucred_unsafe(p), fp->fp_glob)) {
784 			inherit_file = false;
785 #endif
786 		}
787 
788 		*flagp = 0; /* clear UF_INHERIT */
789 
790 		if (!inherit_file) {
791 			fp_close_and_unlock(p, i, fp, 0);
792 			proc_fdlock(p);
793 		} else if (in_exec) {
794 			/* Transfer F_POSIX style lock to new proc */
795 			proc_fdunlock(p);
796 			fg_transfer_filelocks(p, fp->fp_glob, thread);
797 			proc_fdlock(p);
798 		}
799 	}
800 
801 	/* release the per-process workq kq */
802 	if (fdp->fd_wqkqueue) {
803 		dealloc_kqwq = fdp->fd_wqkqueue;
804 		fdp->fd_wqkqueue = NULL;
805 	}
806 
807 	proc_fdunlock(p);
808 
809 	/* Anything to free? */
810 	if (dealloc_kqwq) {
811 		kqworkq_dealloc(dealloc_kqwq);
812 	}
813 }
814 
815 
816 int
fdt_fork(struct filedesc * newfdp,proc_t p,vnode_t uth_cdir,bool in_exec)817 fdt_fork(struct filedesc *newfdp, proc_t p, vnode_t uth_cdir, bool in_exec)
818 {
819 	struct filedesc *fdp = &p->p_fd;
820 	struct fileproc **ofiles;
821 	char *ofileflags;
822 	int n_files, afterlast, freefile;
823 	vnode_t v_dir;
824 #if CONFIG_PROC_RESOURCE_LIMITS
825 	int fd_nfiles_open = 0;
826 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
827 	proc_fdlock(p);
828 
829 	newfdp->fd_flags = (fdp->fd_flags & FILEDESC_FORK_INHERITED_MASK);
830 	newfdp->fd_cmask = fdp->fd_cmask;
831 #if CONFIG_PROC_RESOURCE_LIMITS
832 	newfdp->fd_nfiles_soft_limit = fdp->fd_nfiles_soft_limit;
833 	newfdp->fd_nfiles_hard_limit = fdp->fd_nfiles_hard_limit;
834 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
835 
836 	/*
837 	 * For both fd_cdir and fd_rdir make sure we get
838 	 * a valid reference... if we can't, than set
839 	 * set the pointer(s) to NULL in the child... this
840 	 * will keep us from using a non-referenced vp
841 	 * and allows us to do the vnode_rele only on
842 	 * a properly referenced vp
843 	 */
844 	if ((v_dir = fdp->fd_rdir)) {
845 		if (vnode_getwithref(v_dir) == 0) {
846 			if (vnode_ref(v_dir) == 0) {
847 				newfdp->fd_rdir = v_dir;
848 			}
849 			vnode_put(v_dir);
850 		}
851 		if (newfdp->fd_rdir == NULL) {
852 			/*
853 			 * We couldn't get a new reference on
854 			 * the chroot directory being
855 			 * inherited... this is fatal, since
856 			 * otherwise it would constitute an
857 			 * escape from a chroot environment by
858 			 * the new process.
859 			 */
860 			proc_fdunlock(p);
861 			return EPERM;
862 		}
863 	}
864 
865 	/*
866 	 * If we are running with per-thread current working directories,
867 	 * inherit the new current working directory from the current thread.
868 	 */
869 	if ((v_dir = uth_cdir ? uth_cdir : fdp->fd_cdir)) {
870 		if (vnode_getwithref(v_dir) == 0) {
871 			if (vnode_ref(v_dir) == 0) {
872 				newfdp->fd_cdir = v_dir;
873 			}
874 			vnode_put(v_dir);
875 		}
876 		if (newfdp->fd_cdir == NULL && v_dir == fdp->fd_cdir) {
877 			/*
878 			 * we couldn't get a new reference on
879 			 * the current working directory being
880 			 * inherited... we might as well drop
881 			 * our reference from the parent also
882 			 * since the vnode has gone DEAD making
883 			 * it useless... by dropping it we'll
884 			 * be that much closer to recycling it
885 			 */
886 			vnode_rele(fdp->fd_cdir);
887 			fdp->fd_cdir = NULL;
888 		}
889 	}
890 
891 	/*
892 	 * If the number of open files fits in the internal arrays
893 	 * of the open file structure, use them, otherwise allocate
894 	 * additional memory for the number of descriptors currently
895 	 * in use.
896 	 */
897 	afterlast = fdp->fd_afterlast;
898 	freefile = fdp->fd_freefile;
899 	if (afterlast <= NDFILE) {
900 		n_files = NDFILE;
901 	} else {
902 		n_files = roundup(afterlast, NDEXTENT);
903 	}
904 
905 	proc_fdunlock(p);
906 
907 	ofiles = kalloc_type(struct fileproc *, n_files, Z_WAITOK | Z_ZERO);
908 	ofileflags = kalloc_data(n_files, Z_WAITOK | Z_ZERO);
909 	if (ofiles == NULL || ofileflags == NULL) {
910 		kfree_type(struct fileproc *, n_files, ofiles);
911 		kfree_data(ofileflags, n_files);
912 		if (newfdp->fd_cdir) {
913 			vnode_rele(newfdp->fd_cdir);
914 			newfdp->fd_cdir = NULL;
915 		}
916 		if (newfdp->fd_rdir) {
917 			vnode_rele(newfdp->fd_rdir);
918 			newfdp->fd_rdir = NULL;
919 		}
920 		return ENOMEM;
921 	}
922 
923 	proc_fdlock(p);
924 
925 	for (int i = afterlast; i-- > 0;) {
926 		struct fileproc *ofp, *nfp;
927 		char flags;
928 
929 		ofp = fdp->fd_ofiles[i];
930 		flags = fdp->fd_ofileflags[i];
931 
932 		if (ofp == NULL ||
933 		    (ofp->fp_glob->fg_lflags & FG_CONFINED) ||
934 		    ((ofp->fp_flags & FP_CLOFORK) && !in_exec) ||
935 		    ((ofp->fp_flags & FP_CLOEXEC) && in_exec) ||
936 		    (flags & UF_RESERVED)) {
937 			if (i + 1 == afterlast) {
938 				afterlast = i;
939 			}
940 			if (i < freefile) {
941 				freefile = i;
942 			}
943 
944 			continue;
945 		}
946 
947 		nfp = fileproc_alloc_init();
948 		nfp->fp_glob = ofp->fp_glob;
949 		if (in_exec) {
950 			nfp->fp_flags = (ofp->fp_flags & (FP_CLOEXEC | FP_CLOFORK));
951 			if (ofp->fp_guard_attrs) {
952 				guarded_fileproc_copy_guard(ofp, nfp);
953 			}
954 		} else {
955 			assert(ofp->fp_guard_attrs == 0);
956 			nfp->fp_flags = (ofp->fp_flags & FP_CLOEXEC);
957 		}
958 		fg_ref(p, nfp->fp_glob);
959 
960 		ofiles[i] = nfp;
961 #if CONFIG_PROC_RESOURCE_LIMITS
962 		fd_nfiles_open++;
963 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
964 	}
965 
966 	proc_fdunlock(p);
967 
968 	newfdp->fd_ofiles = ofiles;
969 	newfdp->fd_ofileflags = ofileflags;
970 	newfdp->fd_nfiles = n_files;
971 	newfdp->fd_afterlast = afterlast;
972 	newfdp->fd_freefile = freefile;
973 
974 #if CONFIG_PROC_RESOURCE_LIMITS
975 	newfdp->fd_nfiles_open = fd_nfiles_open;
976 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
977 
978 	return 0;
979 }
980 
981 void
fdt_invalidate(proc_t p)982 fdt_invalidate(proc_t p)
983 {
984 	struct filedesc *fdp = &p->p_fd;
985 	struct fileproc *fp, **ofiles;
986 	char *ofileflags;
987 	struct kqworkq *kqwq = NULL;
988 	vnode_t vn1 = NULL, vn2 = NULL;
989 	struct kqwllist *kqhash = NULL;
990 	u_long kqhashmask = 0;
991 	int n_files = 0;
992 
993 	/*
994 	 * deallocate all the knotes up front and claim empty
995 	 * tables to make any subsequent kqueue closes faster.
996 	 */
997 	knotes_dealloc(p);
998 	assert(fdp->fd_knlistsize == 0);
999 	assert(fdp->fd_knhashmask == 0);
1000 
1001 	/*
1002 	 * dealloc all workloops that have outstanding retains
1003 	 * when created with scheduling parameters.
1004 	 */
1005 	kqworkloops_dealloc(p);
1006 
1007 	proc_fdlock(p);
1008 
1009 	/* close file descriptors */
1010 	if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
1011 		for (int i = fdp->fd_afterlast; i-- > 0;) {
1012 			if ((fp = fdp->fd_ofiles[i]) != NULL) {
1013 				if (fdp->fd_ofileflags[i] & UF_RESERVED) {
1014 					panic("fdfree: found fp with UF_RESERVED");
1015 				}
1016 				fp_close_and_unlock(p, i, fp, 0);
1017 				proc_fdlock(p);
1018 			}
1019 		}
1020 	}
1021 
1022 	n_files = fdp->fd_nfiles;
1023 	ofileflags = fdp->fd_ofileflags;
1024 	ofiles = fdp->fd_ofiles;
1025 	kqwq = fdp->fd_wqkqueue;
1026 	vn1 = fdp->fd_cdir;
1027 	vn2 = fdp->fd_rdir;
1028 
1029 	fdp->fd_ofileflags = NULL;
1030 	fdp->fd_ofiles = NULL;
1031 	fdp->fd_nfiles = 0;
1032 	fdp->fd_wqkqueue = NULL;
1033 	fdp->fd_cdir = NULL;
1034 	fdp->fd_rdir = NULL;
1035 
1036 	proc_fdunlock(p);
1037 
1038 	lck_mtx_lock(&fdp->fd_kqhashlock);
1039 
1040 	kqhash = fdp->fd_kqhash;
1041 	kqhashmask = fdp->fd_kqhashmask;
1042 
1043 	fdp->fd_kqhash = 0;
1044 	fdp->fd_kqhashmask = 0;
1045 
1046 	lck_mtx_unlock(&fdp->fd_kqhashlock);
1047 
1048 	kfree_type(struct fileproc *, n_files, ofiles);
1049 	kfree_data(ofileflags, n_files);
1050 
1051 	if (kqwq) {
1052 		kqworkq_dealloc(kqwq);
1053 	}
1054 	if (vn1) {
1055 		vnode_rele(vn1);
1056 	}
1057 	if (vn2) {
1058 		vnode_rele(vn2);
1059 	}
1060 	if (kqhash) {
1061 		for (uint32_t i = 0; i <= kqhashmask; i++) {
1062 			assert(LIST_EMPTY(&kqhash[i]));
1063 		}
1064 		hashdestroy(kqhash, M_KQUEUE, kqhashmask);
1065 	}
1066 }
1067 
1068 
1069 struct fileproc *
fileproc_alloc_init(void)1070 fileproc_alloc_init(void)
1071 {
1072 	struct fileproc *fp;
1073 
1074 	fp = zalloc_id(ZONE_ID_FILEPROC, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1075 	os_ref_init(&fp->fp_iocount, &f_refgrp);
1076 	return fp;
1077 }
1078 
1079 
1080 void
fileproc_free(struct fileproc * fp)1081 fileproc_free(struct fileproc *fp)
1082 {
1083 	os_ref_count_t __unused refc = os_ref_release(&fp->fp_iocount);
1084 #if DEVELOPMENT || DEBUG
1085 	if (0 != refc) {
1086 		panic("%s: pid %d refc: %u != 0",
1087 		    __func__, proc_pid(current_proc()), refc);
1088 	}
1089 #endif
1090 	if (fp->fp_guard_attrs) {
1091 		guarded_fileproc_unguard(fp);
1092 	}
1093 	assert(fp->fp_wset == NULL);
1094 	zfree_id(ZONE_ID_FILEPROC, fp);
1095 }
1096 
1097 
1098 /*
1099  * Statistics counter for the number of times a process calling fdalloc()
1100  * has resulted in an expansion of the per process open file table.
1101  *
1102  * XXX This would likely be of more use if it were per process
1103  */
1104 int fdexpand;
1105 
1106 #if CONFIG_PROC_RESOURCE_LIMITS
1107 /*
1108  * Should be called only with the proc_fdlock held.
1109  */
1110 void
fd_check_limit_exceeded(struct filedesc * fdp)1111 fd_check_limit_exceeded(struct filedesc *fdp)
1112 {
1113 #if DIAGNOSTIC
1114 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1115 #endif
1116 	if (!fd_above_soft_limit_notify(fdp) && fdp->fd_nfiles_soft_limit &&
1117 	    (fdp->fd_nfiles_open > fdp->fd_nfiles_soft_limit)) {
1118 		fd_above_soft_limit_send_notification(fdp);
1119 		act_set_astproc_resource(current_thread());
1120 	} else if (!fd_above_hard_limit_notify(fdp) && fdp->fd_nfiles_hard_limit &&
1121 	    (fdp->fd_nfiles_open > fdp->fd_nfiles_hard_limit)) {
1122 		fd_above_hard_limit_send_notification(fdp);
1123 		act_set_astproc_resource(current_thread());
1124 	}
1125 }
1126 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1127 
1128 /*
1129  * fdalloc
1130  *
1131  * Description:	Allocate a file descriptor for the process.
1132  *
1133  * Parameters:	p				Process to allocate the fd in
1134  *		want				The fd we would prefer to get
1135  *		result				Pointer to fd we got
1136  *
1137  * Returns:	0				Success
1138  *		EMFILE
1139  *		ENOMEM
1140  *
1141  * Implicit returns:
1142  *		*result (modified)		The fd which was allocated
1143  */
1144 int
fdalloc(proc_t p,int want,int * result)1145 fdalloc(proc_t p, int want, int *result)
1146 {
1147 	struct filedesc *fdp = &p->p_fd;
1148 	int i;
1149 	int last, numfiles, oldnfiles;
1150 	struct fileproc **newofiles;
1151 	char *newofileflags;
1152 	int lim = proc_limitgetcur_nofile(p);
1153 
1154 	/*
1155 	 * Search for a free descriptor starting at the higher
1156 	 * of want or fd_freefile.  If that fails, consider
1157 	 * expanding the ofile array.
1158 	 */
1159 #if DIAGNOSTIC
1160 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1161 #endif
1162 
1163 	for (;;) {
1164 		last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
1165 		if ((i = want) < fdp->fd_freefile) {
1166 			i = fdp->fd_freefile;
1167 		}
1168 		for (; i < last; i++) {
1169 			if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
1170 				procfdtbl_reservefd(p, i);
1171 				if (i >= fdp->fd_afterlast) {
1172 					fdp->fd_afterlast = i + 1;
1173 				}
1174 				if (want <= fdp->fd_freefile) {
1175 					fdp->fd_freefile = i;
1176 				}
1177 				*result = i;
1178 #if CONFIG_PROC_RESOURCE_LIMITS
1179 				fdp->fd_nfiles_open++;
1180 				fd_check_limit_exceeded(fdp);
1181 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1182 				return 0;
1183 			}
1184 		}
1185 
1186 		/*
1187 		 * No space in current array.  Expand?
1188 		 */
1189 		if ((rlim_t)fdp->fd_nfiles >= lim) {
1190 			return EMFILE;
1191 		}
1192 		if (fdp->fd_nfiles < NDEXTENT) {
1193 			numfiles = NDEXTENT;
1194 		} else {
1195 			numfiles = 2 * fdp->fd_nfiles;
1196 		}
1197 		/* Enforce lim */
1198 		if ((rlim_t)numfiles > lim) {
1199 			numfiles = (int)lim;
1200 		}
1201 		proc_fdunlock(p);
1202 		newofiles = kalloc_type(struct fileproc *, numfiles, Z_WAITOK | Z_ZERO);
1203 		newofileflags = kalloc_data(numfiles, Z_WAITOK | Z_ZERO);
1204 		proc_fdlock(p);
1205 		if (newofileflags == NULL || newofiles == NULL) {
1206 			kfree_type(struct fileproc *, numfiles, newofiles);
1207 			kfree_data(newofileflags, numfiles);
1208 			return ENOMEM;
1209 		}
1210 		if (fdp->fd_nfiles >= numfiles) {
1211 			kfree_type(struct fileproc *, numfiles, newofiles);
1212 			kfree_data(newofileflags, numfiles);
1213 			continue;
1214 		}
1215 
1216 		/*
1217 		 * Copy the existing ofile and ofileflags arrays
1218 		 * and zero the new portion of each array.
1219 		 */
1220 		oldnfiles = fdp->fd_nfiles;
1221 		memcpy(newofiles, fdp->fd_ofiles,
1222 		    oldnfiles * sizeof(*fdp->fd_ofiles));
1223 		memcpy(newofileflags, fdp->fd_ofileflags, oldnfiles);
1224 
1225 		kfree_type(struct fileproc *, oldnfiles, fdp->fd_ofiles);
1226 		kfree_data(fdp->fd_ofileflags, oldnfiles);
1227 		fdp->fd_ofiles = newofiles;
1228 		fdp->fd_ofileflags = newofileflags;
1229 		fdp->fd_nfiles = numfiles;
1230 		fdexpand++;
1231 	}
1232 }
1233 
1234 
1235 #pragma mark fileprocs
1236 
1237 void
fileproc_modify_vflags(struct fileproc * fp,fileproc_vflags_t vflags,boolean_t clearflags)1238 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
1239 {
1240 	if (clearflags) {
1241 		os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
1242 	} else {
1243 		os_atomic_or(&fp->fp_vflags, vflags, relaxed);
1244 	}
1245 }
1246 
1247 fileproc_vflags_t
fileproc_get_vflags(struct fileproc * fp)1248 fileproc_get_vflags(struct fileproc *fp)
1249 {
1250 	return os_atomic_load(&fp->fp_vflags, relaxed);
1251 }
1252 
1253 /*
1254  * falloc_withinit
1255  *
1256  * Create a new open file structure and allocate
1257  * a file descriptor for the process that refers to it.
1258  *
1259  * Returns:	0			Success
1260  *
1261  * Description:	Allocate an entry in the per process open file table and
1262  *		return the corresponding fileproc and fd.
1263  *
1264  * Parameters:	p				The process in whose open file
1265  *						table the fd is to be allocated
1266  *		resultfp			Pointer to fileproc pointer
1267  *						return area
1268  *		resultfd			Pointer to fd return area
1269  *		ctx				VFS context
1270  *		fp_zalloc			fileproc allocator to use
1271  *		crarg				allocator args
1272  *
1273  * Returns:	0				Success
1274  *		ENFILE				Too many open files in system
1275  *		fdalloc:EMFILE			Too many open files in process
1276  *		fdalloc:ENOMEM			M_OFILETABL zone exhausted
1277  *		ENOMEM				fp_zone or fg_zone zone
1278  *						exhausted
1279  *
1280  * Implicit returns:
1281  *		*resultfd (modified)		Returned fileproc pointer
1282  *		*resultfd (modified)		Returned fd
1283  *
1284  * Notes:	This function takes separate process and context arguments
1285  *		solely to support kern_exec.c; otherwise, it would take
1286  *		neither, and use the vfs_context_current() routine internally.
1287  */
1288 int
falloc_withinit(proc_t p,struct fileproc ** resultfp,int * resultfd,vfs_context_t ctx,fp_initfn_t fp_init,void * initarg)1289 falloc_withinit(proc_t p, struct fileproc **resultfp, int *resultfd,
1290     vfs_context_t ctx, fp_initfn_t fp_init, void *initarg)
1291 {
1292 	struct fileproc *fp;
1293 	struct fileglob *fg;
1294 	int error, nfd;
1295 #if CONFIG_MACF
1296 	kauth_cred_t cred;
1297 #endif
1298 
1299 	/* Make sure we don't go beyond the system-wide limit */
1300 	if (nfiles >= maxfiles) {
1301 		tablefull("file");
1302 		return ENFILE;
1303 	}
1304 
1305 	proc_fdlock(p);
1306 
1307 	/* fdalloc will make sure the process stays below per-process limit */
1308 	if ((error = fdalloc(p, 0, &nfd))) {
1309 		proc_fdunlock(p);
1310 		return error;
1311 	}
1312 
1313 #if CONFIG_MACF
1314 	cred = kauth_cred_proc_ref(p);
1315 	error = mac_file_check_create(cred);
1316 	kauth_cred_unref(&cred);
1317 	if (error) {
1318 		proc_fdunlock(p);
1319 		return error;
1320 	}
1321 #endif
1322 
1323 	/*
1324 	 * Allocate a new file descriptor.
1325 	 * If the process has file descriptor zero open, add to the list
1326 	 * of open files at that point, otherwise put it at the front of
1327 	 * the list of open files.
1328 	 */
1329 	proc_fdunlock(p);
1330 
1331 	fp = fileproc_alloc_init();
1332 	if (fp_init) {
1333 		fp_init(fp, initarg);
1334 	}
1335 
1336 	fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
1337 	lck_mtx_init(&fg->fg_lock, &file_lck_grp, LCK_ATTR_NULL);
1338 
1339 	os_ref_retain_locked(&fp->fp_iocount);
1340 	os_ref_init_raw(&fg->fg_count, &f_refgrp);
1341 	fg->fg_ops = &uninitops;
1342 	fp->fp_glob = fg;
1343 
1344 	kauth_cred_ref(ctx->vc_ucred);
1345 
1346 	fp->f_cred = ctx->vc_ucred;
1347 
1348 	os_atomic_inc(&nfiles, relaxed);
1349 
1350 	proc_fdlock(p);
1351 
1352 	p->p_fd.fd_ofiles[nfd] = fp;
1353 
1354 	proc_fdunlock(p);
1355 
1356 	if (resultfp) {
1357 		*resultfp = fp;
1358 	}
1359 	if (resultfd) {
1360 		*resultfd = nfd;
1361 	}
1362 
1363 	return 0;
1364 }
1365 
1366 int
falloc(proc_t p,struct fileproc ** resultfp,int * resultfd,vfs_context_t ctx)1367 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
1368 {
1369 	return falloc_withinit(p, resultfp, resultfd, ctx, NULL, NULL);
1370 }
1371 
1372 
1373 /*
1374  * fp_free
1375  *
1376  * Description:	Release the fd and free the fileproc associated with the fd
1377  *		in the per process open file table of the specified process;
1378  *		these values must correspond.
1379  *
1380  * Parameters:	p				Process containing fd
1381  *		fd				fd to be released
1382  *		fp				fileproc to be freed
1383  */
1384 void
fp_free(proc_t p,int fd,struct fileproc * fp)1385 fp_free(proc_t p, int fd, struct fileproc * fp)
1386 {
1387 	proc_fdlock_spin(p);
1388 	fdrelse(p, fd);
1389 	proc_fdunlock(p);
1390 
1391 	fg_free(fp->fp_glob);
1392 	os_ref_release_live(&fp->fp_iocount);
1393 	fileproc_free(fp);
1394 }
1395 
1396 
1397 struct fileproc *
fp_get_noref_locked(proc_t p,int fd)1398 fp_get_noref_locked(proc_t p, int fd)
1399 {
1400 	struct filedesc *fdp = &p->p_fd;
1401 	struct fileproc *fp;
1402 
1403 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1404 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1405 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1406 		return NULL;
1407 	}
1408 
1409 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1410 	return fp;
1411 }
1412 
1413 struct fileproc *
fp_get_noref_locked_with_iocount(proc_t p,int fd)1414 fp_get_noref_locked_with_iocount(proc_t p, int fd)
1415 {
1416 	struct filedesc *fdp = &p->p_fd;
1417 	struct fileproc *fp = NULL;
1418 
1419 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1420 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1421 	    os_ref_get_count(&fp->fp_iocount) <= 1 ||
1422 	    ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1423 	    !(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
1424 		panic("%s: caller without an ioccount on fileproc (%d/:%p)",
1425 		    __func__, fd, fp);
1426 	}
1427 
1428 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1429 	return fp;
1430 }
1431 
1432 
1433 /*
1434  * fp_lookup
1435  *
1436  * Description:	Get fileproc pointer for a given fd from the per process
1437  *		open file table of the specified process and if successful,
1438  *		increment the fp_iocount
1439  *
1440  * Parameters:	p				Process in which fd lives
1441  *		fd				fd to get information for
1442  *		resultfp			Pointer to result fileproc
1443  *						pointer area, or 0 if none
1444  *		locked				!0 if the caller holds the
1445  *						proc_fdlock, 0 otherwise
1446  *
1447  * Returns:	0			Success
1448  *		EBADF			Bad file descriptor
1449  *
1450  * Implicit returns:
1451  *		*resultfp (modified)		Fileproc pointer
1452  *
1453  * Locks:	If the argument 'locked' is non-zero, then the caller is
1454  *		expected to have taken and held the proc_fdlock; if it is
1455  *		zero, than this routine internally takes and drops this lock.
1456  */
1457 int
fp_lookup(proc_t p,int fd,struct fileproc ** resultfp,int locked)1458 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
1459 {
1460 	struct filedesc *fdp = &p->p_fd;
1461 	struct fileproc *fp;
1462 
1463 	if (!locked) {
1464 		proc_fdlock_spin(p);
1465 	}
1466 	if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
1467 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1468 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1469 		if (!locked) {
1470 			proc_fdunlock(p);
1471 		}
1472 		return EBADF;
1473 	}
1474 
1475 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1476 	os_ref_retain_locked(&fp->fp_iocount);
1477 
1478 	if (resultfp) {
1479 		*resultfp = fp;
1480 	}
1481 	if (!locked) {
1482 		proc_fdunlock(p);
1483 	}
1484 
1485 	return 0;
1486 }
1487 
1488 
1489 int
fp_get_ftype(proc_t p,int fd,file_type_t ftype,int err,struct fileproc ** fpp)1490 fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
1491 {
1492 	struct filedesc *fdp = &p->p_fd;
1493 	struct fileproc *fp;
1494 
1495 	proc_fdlock_spin(p);
1496 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1497 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1498 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1499 		proc_fdunlock(p);
1500 		return EBADF;
1501 	}
1502 
1503 	if (fp->f_type != ftype) {
1504 		proc_fdunlock(p);
1505 		return err;
1506 	}
1507 
1508 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1509 	os_ref_retain_locked(&fp->fp_iocount);
1510 	proc_fdunlock(p);
1511 
1512 	*fpp = fp;
1513 	return 0;
1514 }
1515 
1516 
1517 /*
1518  * fp_drop
1519  *
1520  * Description:	Drop the I/O reference previously taken by calling fp_lookup
1521  *		et. al.
1522  *
1523  * Parameters:	p				Process in which the fd lives
1524  *		fd				fd associated with the fileproc
1525  *		fp				fileproc on which to set the
1526  *						flag and drop the reference
1527  *		locked				flag to internally take and
1528  *						drop proc_fdlock if it is not
1529  *						already held by the caller
1530  *
1531  * Returns:	0				Success
1532  *		EBADF				Bad file descriptor
1533  *
1534  * Locks:	This function internally takes and drops the proc_fdlock for
1535  *		the supplied process if 'locked' is non-zero, and assumes that
1536  *		the caller already holds this lock if 'locked' is non-zero.
1537  *
1538  * Notes:	The fileproc must correspond to the fd in the supplied proc
1539  */
1540 int
fp_drop(proc_t p,int fd,struct fileproc * fp,int locked)1541 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
1542 {
1543 	struct filedesc *fdp = &p->p_fd;
1544 	int     needwakeup = 0;
1545 
1546 	if (!locked) {
1547 		proc_fdlock_spin(p);
1548 	}
1549 	if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
1550 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1551 	    ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1552 	    !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
1553 		if (!locked) {
1554 			proc_fdunlock(p);
1555 		}
1556 		return EBADF;
1557 	}
1558 
1559 	if (1 == os_ref_release_locked(&fp->fp_iocount)) {
1560 		if (fp->fp_flags & FP_SELCONFLICT) {
1561 			fp->fp_flags &= ~FP_SELCONFLICT;
1562 		}
1563 
1564 		if (fdp->fd_fpdrainwait) {
1565 			fdp->fd_fpdrainwait = 0;
1566 			needwakeup = 1;
1567 		}
1568 	}
1569 	if (!locked) {
1570 		proc_fdunlock(p);
1571 	}
1572 	if (needwakeup) {
1573 		wakeup(&fdp->fd_fpdrainwait);
1574 	}
1575 
1576 	return 0;
1577 }
1578 
1579 
1580 /*
1581  * fileproc_drain
1582  *
1583  * Description:	Drain out pending I/O operations
1584  *
1585  * Parameters:	p				Process closing this file
1586  *		fp				fileproc struct for the open
1587  *						instance on the file
1588  *
1589  * Returns:	void
1590  *
1591  * Locks:	Assumes the caller holds the proc_fdlock
1592  *
1593  * Notes:	For character devices, this occurs on the last close of the
1594  *		device; for all other file descriptors, this occurs on each
1595  *		close to prevent fd's from being closed out from under
1596  *		operations currently in progress and blocked
1597  *
1598  * See Also:    file_vnode(), file_socket(), file_drop(), and the cautions
1599  *		regarding their use and interaction with this function.
1600  */
1601 static void
fileproc_drain(proc_t p,struct fileproc * fp)1602 fileproc_drain(proc_t p, struct fileproc * fp)
1603 {
1604 	struct filedesc *fdp = &p->p_fd;
1605 	struct vfs_context context;
1606 	thread_t thread;
1607 	bool is_current_proc;
1608 
1609 	is_current_proc = (p == current_proc());
1610 
1611 	if (!is_current_proc) {
1612 		proc_lock(p);
1613 		thread = proc_thread(p); /* XXX */
1614 		thread_reference(thread);
1615 		proc_unlock(p);
1616 	} else {
1617 		thread = current_thread();
1618 	}
1619 
1620 	context.vc_thread = thread;
1621 	context.vc_ucred = fp->fp_glob->fg_cred;
1622 
1623 	/* Set the vflag for drain */
1624 	fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
1625 
1626 	while (os_ref_get_count(&fp->fp_iocount) > 1) {
1627 		lck_mtx_convert_spin(&fdp->fd_lock);
1628 
1629 		fo_drain(fp, &context);
1630 		if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
1631 			struct select_set *selset;
1632 
1633 			if (fp->fp_guard_attrs) {
1634 				selset = fp->fp_guard->fpg_wset;
1635 			} else {
1636 				selset = fp->fp_wset;
1637 			}
1638 			if (waitq_wakeup64_all(selset, NO_EVENT64,
1639 			    THREAD_INTERRUPTED, WAITQ_WAKEUP_DEFAULT) == KERN_INVALID_ARGUMENT) {
1640 				panic("bad wait queue for waitq_wakeup64_all %p (%sfp:%p)",
1641 				    selset, fp->fp_guard_attrs ? "guarded " : "", fp);
1642 			}
1643 		}
1644 		if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1645 			if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
1646 			    THREAD_INTERRUPTED, WAITQ_WAKEUP_DEFAULT) == KERN_INVALID_ARGUMENT) {
1647 				panic("bad select_conflict_queue");
1648 			}
1649 		}
1650 		fdp->fd_fpdrainwait = 1;
1651 		msleep(&fdp->fd_fpdrainwait, &fdp->fd_lock, PRIBIO, "fpdrain", NULL);
1652 	}
1653 #if DIAGNOSTIC
1654 	if ((fp->fp_flags & FP_INSELECT) != 0) {
1655 		panic("FP_INSELECT set on drained fp");
1656 	}
1657 #endif
1658 	if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1659 		fp->fp_flags &= ~FP_SELCONFLICT;
1660 	}
1661 
1662 	if (!is_current_proc) {
1663 		thread_deallocate(thread);
1664 	}
1665 }
1666 
1667 
1668 int
fp_close_and_unlock(proc_t p,int fd,struct fileproc * fp,int flags)1669 fp_close_and_unlock(proc_t p, int fd, struct fileproc *fp, int flags)
1670 {
1671 	struct filedesc *fdp = &p->p_fd;
1672 	struct fileglob *fg = fp->fp_glob;
1673 #if CONFIG_MACF
1674 	kauth_cred_t cred;
1675 #endif
1676 
1677 #if DIAGNOSTIC
1678 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1679 #endif
1680 
1681 	/*
1682 	 * Keep most people from finding the filedesc while we are closing it.
1683 	 *
1684 	 * Callers are:
1685 	 *
1686 	 * - dup2() which always waits for UF_RESERVED to clear
1687 	 *
1688 	 * - close/guarded_close/... who will fail the fileproc lookup if
1689 	 *   UF_RESERVED is set,
1690 	 *
1691 	 * - fdexec()/fdfree() who only run once all threads in the proc
1692 	 *   are properly canceled, hence no fileproc in this proc should
1693 	 *   be in flux.
1694 	 *
1695 	 * Which means that neither UF_RESERVED nor UF_CLOSING should be set.
1696 	 *
1697 	 * Callers of fp_get_noref_locked_with_iocount() can still find
1698 	 * this entry so that they can drop their I/O reference despite
1699 	 * not having remembered the fileproc pointer (namely select() and
1700 	 * file_drop()).
1701 	 */
1702 	if (p->p_fd.fd_ofileflags[fd] & (UF_RESERVED | UF_CLOSING)) {
1703 		panic("%s: called with fileproc in flux (%d/:%p)",
1704 		    __func__, fd, fp);
1705 	}
1706 	p->p_fd.fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
1707 
1708 	if ((fp->fp_flags & FP_AIOISSUED) ||
1709 #if CONFIG_MACF
1710 	    (FILEGLOB_DTYPE(fg) == DTYPE_VNODE)
1711 #else
1712 	    kauth_authorize_fileop_has_listeners()
1713 #endif
1714 	    ) {
1715 		proc_fdunlock(p);
1716 
1717 		if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1718 			/*
1719 			 * call out to allow 3rd party notification of close.
1720 			 * Ignore result of kauth_authorize_fileop call.
1721 			 */
1722 #if CONFIG_MACF
1723 			cred = kauth_cred_proc_ref(p);
1724 			mac_file_notify_close(cred, fp->fp_glob);
1725 			kauth_cred_unref(&cred);
1726 #endif
1727 
1728 			if (kauth_authorize_fileop_has_listeners() &&
1729 			    vnode_getwithref((vnode_t)fg_get_data(fg)) == 0) {
1730 				u_int   fileop_flags = 0;
1731 				if (fg->fg_flag & FWASWRITTEN) {
1732 					fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
1733 				}
1734 				kauth_authorize_fileop(fg->fg_cred, KAUTH_FILEOP_CLOSE,
1735 				    (uintptr_t)fg_get_data(fg), (uintptr_t)fileop_flags);
1736 
1737 				vnode_put((vnode_t)fg_get_data(fg));
1738 			}
1739 		}
1740 
1741 		if (fp->fp_flags & FP_AIOISSUED) {
1742 			/*
1743 			 * cancel all async IO requests that can be cancelled.
1744 			 */
1745 			_aio_close( p, fd );
1746 		}
1747 
1748 		proc_fdlock(p);
1749 	}
1750 
1751 	if (fd < fdp->fd_knlistsize) {
1752 		knote_fdclose(p, fd);
1753 	}
1754 
1755 	fileproc_drain(p, fp);
1756 
1757 	if (flags & FD_DUP2RESV) {
1758 		fdp->fd_ofiles[fd] = NULL;
1759 		fdp->fd_ofileflags[fd] &= ~UF_CLOSING;
1760 	} else {
1761 		fdrelse(p, fd);
1762 	}
1763 
1764 	proc_fdunlock(p);
1765 
1766 	if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
1767 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
1768 		    fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fg_get_data(fg)));
1769 	}
1770 
1771 	fileproc_free(fp);
1772 
1773 	return fg_drop(p, fg);
1774 }
1775 
1776 /*
1777  * dupfdopen
1778  *
1779  * Description:	Duplicate the specified descriptor to a free descriptor;
1780  *		this is the second half of fdopen(), above.
1781  *
1782  * Parameters:	p				current process pointer
1783  *		indx				fd to dup to
1784  *		dfd				fd to dup from
1785  *		mode				mode to set on new fd
1786  *		error				command code
1787  *
1788  * Returns:	0				Success
1789  *		EBADF				Source fd is bad
1790  *		EACCES				Requested mode not allowed
1791  *		!0				'error', if not ENODEV or
1792  *						ENXIO
1793  *
1794  * Notes:	XXX This is not thread safe; see fdopen() above
1795  */
1796 int
dupfdopen(proc_t p,int indx,int dfd,int flags,int error)1797 dupfdopen(proc_t p, int indx, int dfd, int flags, int error)
1798 {
1799 	struct filedesc *fdp = &p->p_fd;
1800 	struct fileproc *wfp;
1801 	struct fileproc *fp;
1802 #if CONFIG_MACF
1803 	int myerror;
1804 #endif
1805 
1806 	/*
1807 	 * If the to-be-dup'd fd number is greater than the allowed number
1808 	 * of file descriptors, or the fd to be dup'd has already been
1809 	 * closed, reject.  Note, check for new == old is necessary as
1810 	 * falloc could allocate an already closed to-be-dup'd descriptor
1811 	 * as the new descriptor.
1812 	 */
1813 	proc_fdlock(p);
1814 
1815 	fp = fdp->fd_ofiles[indx];
1816 	if (dfd < 0 || dfd >= fdp->fd_nfiles ||
1817 	    (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
1818 	    (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
1819 		proc_fdunlock(p);
1820 		return EBADF;
1821 	}
1822 #if CONFIG_MACF
1823 	myerror = mac_file_check_dup(kauth_cred_get(), wfp->fp_glob, dfd);
1824 	if (myerror) {
1825 		proc_fdunlock(p);
1826 		return myerror;
1827 	}
1828 #endif
1829 	/*
1830 	 * There are two cases of interest here.
1831 	 *
1832 	 * For ENODEV simply dup (dfd) to file descriptor
1833 	 * (indx) and return.
1834 	 *
1835 	 * For ENXIO steal away the file structure from (dfd) and
1836 	 * store it in (indx).  (dfd) is effectively closed by
1837 	 * this operation.
1838 	 *
1839 	 * Any other error code is just returned.
1840 	 */
1841 	switch (error) {
1842 	case ENODEV:
1843 		if (fp_isguarded(wfp, GUARD_DUP)) {
1844 			proc_fdunlock(p);
1845 			return EPERM;
1846 		}
1847 
1848 		/*
1849 		 * Check that the mode the file is being opened for is a
1850 		 * subset of the mode of the existing descriptor.
1851 		 */
1852 		if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1853 			proc_fdunlock(p);
1854 			return EACCES;
1855 		}
1856 		if (indx >= fdp->fd_afterlast) {
1857 			fdp->fd_afterlast = indx + 1;
1858 		}
1859 
1860 		if (fp->fp_glob) {
1861 			fg_free(fp->fp_glob);
1862 		}
1863 		fg_ref(p, wfp->fp_glob);
1864 		fp->fp_glob = wfp->fp_glob;
1865 		/*
1866 		 * Historically, open(/dev/fd/<n>) preserves close on fork/exec,
1867 		 * unlike dup(), dup2() or fcntl(F_DUPFD).
1868 		 *
1869 		 * open1() already handled O_CLO{EXEC,FORK}
1870 		 */
1871 		fp->fp_flags |= (wfp->fp_flags & (FP_CLOFORK | FP_CLOEXEC));
1872 
1873 		procfdtbl_releasefd(p, indx, NULL);
1874 		fp_drop(p, indx, fp, 1);
1875 		proc_fdunlock(p);
1876 		return 0;
1877 
1878 	default:
1879 		proc_fdunlock(p);
1880 		return error;
1881 	}
1882 	/* NOTREACHED */
1883 }
1884 
1885 
1886 #pragma mark KPIS (sys/file.h)
1887 
1888 /*
1889  * fg_get_vnode
1890  *
1891  * Description:	Return vnode associated with the file structure, if
1892  *		any.  The lifetime of the returned vnode is bound to
1893  *		the lifetime of the file structure.
1894  *
1895  * Parameters:	fg				Pointer to fileglob to
1896  *						inspect
1897  *
1898  * Returns:	vnode_t
1899  */
1900 vnode_t
fg_get_vnode(struct fileglob * fg)1901 fg_get_vnode(struct fileglob *fg)
1902 {
1903 	if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1904 		return (vnode_t)fg_get_data(fg);
1905 	} else {
1906 		return NULL;
1907 	}
1908 }
1909 
1910 
1911 /*
1912  * fp_getfvp
1913  *
1914  * Description:	Get fileproc and vnode pointer for a given fd from the per
1915  *		process open file table of the specified process, and if
1916  *		successful, increment the fp_iocount
1917  *
1918  * Parameters:	p				Process in which fd lives
1919  *		fd				fd to get information for
1920  *		resultfp			Pointer to result fileproc
1921  *						pointer area, or 0 if none
1922  *		resultvp			Pointer to result vnode pointer
1923  *						area, or 0 if none
1924  *
1925  * Returns:	0				Success
1926  *		EBADF				Bad file descriptor
1927  *		ENOTSUP				fd does not refer to a vnode
1928  *
1929  * Implicit returns:
1930  *		*resultfp (modified)		Fileproc pointer
1931  *		*resultvp (modified)		vnode pointer
1932  *
1933  * Notes:	The resultfp and resultvp fields are optional, and may be
1934  *		independently specified as NULL to skip returning information
1935  *
1936  * Locks:	Internally takes and releases proc_fdlock
1937  */
1938 int
fp_getfvp(proc_t p,int fd,struct fileproc ** resultfp,struct vnode ** resultvp)1939 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
1940 {
1941 	struct fileproc *fp;
1942 	int error;
1943 
1944 	error = fp_get_ftype(p, fd, DTYPE_VNODE, ENOTSUP, &fp);
1945 	if (error == 0) {
1946 		if (resultfp) {
1947 			*resultfp = fp;
1948 		}
1949 		if (resultvp) {
1950 			*resultvp = (struct vnode *)fp_get_data(fp);
1951 		}
1952 	}
1953 
1954 	return error;
1955 }
1956 
1957 
1958 /*
1959  * fp_get_pipe_id
1960  *
1961  * Description:	Get pipe id for a given fd from the per process open file table
1962  *		of the specified process.
1963  *
1964  * Parameters:	p				Process in which fd lives
1965  *		fd				fd to get information for
1966  *		result_pipe_id			Pointer to result pipe id
1967  *
1968  * Returns:	0				Success
1969  *		EIVAL				NULL pointer arguments passed
1970  *		fp_lookup:EBADF			Bad file descriptor
1971  *		ENOTSUP				fd does not refer to a pipe
1972  *
1973  * Implicit returns:
1974  *		*result_pipe_id (modified)	pipe id
1975  *
1976  * Locks:	Internally takes and releases proc_fdlock
1977  */
1978 int
fp_get_pipe_id(proc_t p,int fd,uint64_t * result_pipe_id)1979 fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
1980 {
1981 	struct fileproc *fp = FILEPROC_NULL;
1982 	struct fileglob *fg = NULL;
1983 	int error = 0;
1984 
1985 	if (p == NULL || result_pipe_id == NULL) {
1986 		return EINVAL;
1987 	}
1988 
1989 	proc_fdlock(p);
1990 	if ((error = fp_lookup(p, fd, &fp, 1))) {
1991 		proc_fdunlock(p);
1992 		return error;
1993 	}
1994 	fg = fp->fp_glob;
1995 
1996 	if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
1997 		*result_pipe_id = pipe_id((struct pipe*)fg_get_data(fg));
1998 	} else {
1999 		error = ENOTSUP;
2000 	}
2001 
2002 	fp_drop(p, fd, fp, 1);
2003 	proc_fdunlock(p);
2004 	return error;
2005 }
2006 
2007 
2008 /*
2009  * file_vnode
2010  *
2011  * Description:	Given an fd, look it up in the current process's per process
2012  *		open file table, and return its internal vnode pointer.
2013  *
2014  * Parameters:	fd				fd to obtain vnode from
2015  *		vpp				pointer to vnode return area
2016  *
2017  * Returns:	0				Success
2018  *		EINVAL				The fd does not refer to a
2019  *						vnode fileproc entry
2020  *	fp_lookup:EBADF				Bad file descriptor
2021  *
2022  * Implicit returns:
2023  *		*vpp (modified)			Returned vnode pointer
2024  *
2025  * Locks:	This function internally takes and drops the proc_fdlock for
2026  *		the current process
2027  *
2028  * Notes:	If successful, this function increments the fp_iocount on the
2029  *		fd's corresponding fileproc.
2030  *
2031  *		The fileproc referenced is not returned; because of this, care
2032  *		must be taken to not drop the last reference (e.g. by closing
2033  *		the file).  This is inherently unsafe, since the reference may
2034  *		not be recoverable from the vnode, if there is a subsequent
2035  *		close that destroys the associate fileproc.  The caller should
2036  *		therefore retain their own reference on the fileproc so that
2037  *		the fp_iocount can be dropped subsequently.  Failure to do this
2038  *		can result in the returned pointer immediately becoming invalid
2039  *		following the call.
2040  *
2041  *		Use of this function is discouraged.
2042  */
2043 int
file_vnode(int fd,struct vnode ** vpp)2044 file_vnode(int fd, struct vnode **vpp)
2045 {
2046 	return file_vnode_withvid(fd, vpp, NULL);
2047 }
2048 
2049 
2050 /*
2051  * file_vnode_withvid
2052  *
2053  * Description:	Given an fd, look it up in the current process's per process
2054  *		open file table, and return its internal vnode pointer.
2055  *
2056  * Parameters:	fd				fd to obtain vnode from
2057  *		vpp				pointer to vnode return area
2058  *		vidp				pointer to vid of the returned vnode
2059  *
2060  * Returns:	0				Success
2061  *		EINVAL				The fd does not refer to a
2062  *						vnode fileproc entry
2063  *	fp_lookup:EBADF				Bad file descriptor
2064  *
2065  * Implicit returns:
2066  *		*vpp (modified)			Returned vnode pointer
2067  *
2068  * Locks:	This function internally takes and drops the proc_fdlock for
2069  *		the current process
2070  *
2071  * Notes:	If successful, this function increments the fp_iocount on the
2072  *		fd's corresponding fileproc.
2073  *
2074  *		The fileproc referenced is not returned; because of this, care
2075  *		must be taken to not drop the last reference (e.g. by closing
2076  *		the file).  This is inherently unsafe, since the reference may
2077  *		not be recoverable from the vnode, if there is a subsequent
2078  *		close that destroys the associate fileproc.  The caller should
2079  *		therefore retain their own reference on the fileproc so that
2080  *		the fp_iocount can be dropped subsequently.  Failure to do this
2081  *		can result in the returned pointer immediately becoming invalid
2082  *		following the call.
2083  *
2084  *		Use of this function is discouraged.
2085  */
2086 int
file_vnode_withvid(int fd,struct vnode ** vpp,uint32_t * vidp)2087 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t *vidp)
2088 {
2089 	struct fileproc *fp;
2090 	int error;
2091 
2092 	error = fp_get_ftype(current_proc(), fd, DTYPE_VNODE, EINVAL, &fp);
2093 	if (error == 0) {
2094 		if (vpp) {
2095 			*vpp = (struct vnode *)fp_get_data(fp);
2096 		}
2097 		if (vidp) {
2098 			*vidp = vnode_vid((struct vnode *)fp_get_data(fp));
2099 		}
2100 	}
2101 	return error;
2102 }
2103 
2104 /*
2105  * file_socket
2106  *
2107  * Description:	Given an fd, look it up in the current process's per process
2108  *		open file table, and return its internal socket pointer.
2109  *
2110  * Parameters:	fd				fd to obtain vnode from
2111  *		sp				pointer to socket return area
2112  *
2113  * Returns:	0				Success
2114  *		ENOTSOCK			Not a socket
2115  *		fp_lookup:EBADF			Bad file descriptor
2116  *
2117  * Implicit returns:
2118  *		*sp (modified)			Returned socket pointer
2119  *
2120  * Locks:	This function internally takes and drops the proc_fdlock for
2121  *		the current process
2122  *
2123  * Notes:	If successful, this function increments the fp_iocount on the
2124  *		fd's corresponding fileproc.
2125  *
2126  *		The fileproc referenced is not returned; because of this, care
2127  *		must be taken to not drop the last reference (e.g. by closing
2128  *		the file).  This is inherently unsafe, since the reference may
2129  *		not be recoverable from the socket, if there is a subsequent
2130  *		close that destroys the associate fileproc.  The caller should
2131  *		therefore retain their own reference on the fileproc so that
2132  *		the fp_iocount can be dropped subsequently.  Failure to do this
2133  *		can result in the returned pointer immediately becoming invalid
2134  *		following the call.
2135  *
2136  *		Use of this function is discouraged.
2137  */
2138 int
file_socket(int fd,struct socket ** sp)2139 file_socket(int fd, struct socket **sp)
2140 {
2141 	struct fileproc *fp;
2142 	int error;
2143 
2144 	error = fp_get_ftype(current_proc(), fd, DTYPE_SOCKET, ENOTSOCK, &fp);
2145 	if (error == 0) {
2146 		if (sp) {
2147 			*sp = (struct socket *)fp_get_data(fp);
2148 		}
2149 	}
2150 	return error;
2151 }
2152 
2153 
2154 /*
2155  * file_flags
2156  *
2157  * Description:	Given an fd, look it up in the current process's per process
2158  *		open file table, and return its fileproc's flags field.
2159  *
2160  * Parameters:	fd				fd whose flags are to be
2161  *						retrieved
2162  *		flags				pointer to flags data area
2163  *
2164  * Returns:	0				Success
2165  *		ENOTSOCK			Not a socket
2166  *		fp_lookup:EBADF			Bad file descriptor
2167  *
2168  * Implicit returns:
2169  *		*flags (modified)		Returned flags field
2170  *
2171  * Locks:	This function internally takes and drops the proc_fdlock for
2172  *		the current process
2173  */
2174 int
file_flags(int fd,int * flags)2175 file_flags(int fd, int *flags)
2176 {
2177 	proc_t p = current_proc();
2178 	struct fileproc *fp;
2179 	int error = EBADF;
2180 
2181 	proc_fdlock_spin(p);
2182 	fp = fp_get_noref_locked(p, fd);
2183 	if (fp) {
2184 		*flags = (int)fp->f_flag;
2185 		error = 0;
2186 	}
2187 	proc_fdunlock(p);
2188 
2189 	return error;
2190 }
2191 
2192 
2193 /*
2194  * file_drop
2195  *
2196  * Description:	Drop an iocount reference on an fd, and wake up any waiters
2197  *		for draining (i.e. blocked in fileproc_drain() called during
2198  *		the last attempt to close a file).
2199  *
2200  * Parameters:	fd				fd on which an ioreference is
2201  *						to be dropped
2202  *
2203  * Returns:	0				Success
2204  *
2205  * Description:	Given an fd, look it up in the current process's per process
2206  *		open file table, and drop it's fileproc's fp_iocount by one
2207  *
2208  * Notes:	This is intended as a corresponding operation to the functions
2209  *		file_vnode() and file_socket() operations.
2210  *
2211  *		If the caller can't possibly hold an I/O reference,
2212  *		this function will panic the kernel rather than allowing
2213  *		for memory corruption. Callers should always call this
2214  *		because they acquired an I/O reference on this file before.
2215  *
2216  *		Use of this function is discouraged.
2217  */
2218 int
file_drop(int fd)2219 file_drop(int fd)
2220 {
2221 	struct fileproc *fp;
2222 	proc_t p = current_proc();
2223 	struct filedesc *fdp = &p->p_fd;
2224 	int     needwakeup = 0;
2225 
2226 	proc_fdlock_spin(p);
2227 	fp = fp_get_noref_locked_with_iocount(p, fd);
2228 
2229 	if (1 == os_ref_release_locked(&fp->fp_iocount)) {
2230 		if (fp->fp_flags & FP_SELCONFLICT) {
2231 			fp->fp_flags &= ~FP_SELCONFLICT;
2232 		}
2233 
2234 		if (fdp->fd_fpdrainwait) {
2235 			fdp->fd_fpdrainwait = 0;
2236 			needwakeup = 1;
2237 		}
2238 	}
2239 	proc_fdunlock(p);
2240 
2241 	if (needwakeup) {
2242 		wakeup(&fdp->fd_fpdrainwait);
2243 	}
2244 	return 0;
2245 }
2246 
2247 
2248 #pragma mark syscalls
2249 
2250 #ifndef HFS_GET_BOOT_INFO
2251 #define HFS_GET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00004)
2252 #endif
2253 
2254 #ifndef HFS_SET_BOOT_INFO
2255 #define HFS_SET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00005)
2256 #endif
2257 
2258 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
2259 #define APFSIOC_REVERT_TO_SNAPSHOT  _IOW('J', 1, u_int64_t)
2260 #endif
2261 
2262 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
2263 	        (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
2264 	        (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
2265 	        ? 1 : 0)
2266 
2267 /*
2268  * sys_getdtablesize
2269  *
2270  * Description:	Returns the per process maximum size of the descriptor table
2271  *
2272  * Parameters:	p				Process being queried
2273  *		retval				Pointer to the call return area
2274  *
2275  * Returns:	0				Success
2276  *
2277  * Implicit returns:
2278  *		*retval (modified)		Size of dtable
2279  */
2280 int
sys_getdtablesize(proc_t p,__unused struct getdtablesize_args * uap,int32_t * retval)2281 sys_getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
2282 {
2283 	*retval = proc_limitgetcur_nofile(p);
2284 	return 0;
2285 }
2286 
2287 
2288 /*
2289  * check_file_seek_range
2290  *
2291  * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
2292  *
2293  * Parameters:  fl		Flock structure.
2294  *		cur_file_offset	Current offset in the file.
2295  *
2296  * Returns:     0               on Success.
2297  *		EOVERFLOW	on overflow.
2298  *		EINVAL          on offset less than zero.
2299  */
2300 
2301 static int
check_file_seek_range(struct flock * fl,off_t cur_file_offset)2302 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
2303 {
2304 	if (fl->l_whence == SEEK_CUR) {
2305 		/* Check if the start marker is beyond LLONG_MAX. */
2306 		if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
2307 			/* Check if start marker is negative */
2308 			if (fl->l_start < 0) {
2309 				return EINVAL;
2310 			}
2311 			return EOVERFLOW;
2312 		}
2313 		/* Check if the start marker is negative. */
2314 		if (fl->l_start + cur_file_offset < 0) {
2315 			return EINVAL;
2316 		}
2317 		/* Check if end marker is beyond LLONG_MAX. */
2318 		if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
2319 		    cur_file_offset, fl->l_len - 1))) {
2320 			return EOVERFLOW;
2321 		}
2322 		/* Check if the end marker is negative. */
2323 		if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
2324 		    fl->l_len < 0)) {
2325 			return EINVAL;
2326 		}
2327 	} else if (fl->l_whence == SEEK_SET) {
2328 		/* Check if the start marker is negative. */
2329 		if (fl->l_start < 0) {
2330 			return EINVAL;
2331 		}
2332 		/* Check if the end marker is beyond LLONG_MAX. */
2333 		if ((fl->l_len > 0) &&
2334 		    CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
2335 			return EOVERFLOW;
2336 		}
2337 		/* Check if the end marker is negative. */
2338 		if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
2339 			return EINVAL;
2340 		}
2341 	}
2342 	return 0;
2343 }
2344 
2345 
2346 /*
2347  * sys_dup
2348  *
2349  * Description:	Duplicate a file descriptor.
2350  *
2351  * Parameters:	p				Process performing the dup
2352  *		uap->fd				The fd to dup
2353  *		retval				Pointer to the call return area
2354  *
2355  * Returns:	0				Success
2356  *		!0				Errno
2357  *
2358  * Implicit returns:
2359  *		*retval (modified)		The new descriptor
2360  */
2361 int
sys_dup(proc_t p,struct dup_args * uap,int32_t * retval)2362 sys_dup(proc_t p, struct dup_args *uap, int32_t *retval)
2363 {
2364 	struct filedesc *fdp = &p->p_fd;
2365 	int old = uap->fd;
2366 	int new, error;
2367 	struct fileproc *fp;
2368 
2369 	proc_fdlock(p);
2370 	if ((error = fp_lookup(p, old, &fp, 1))) {
2371 		proc_fdunlock(p);
2372 		return error;
2373 	}
2374 	if (fp_isguarded(fp, GUARD_DUP)) {
2375 		error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2376 		(void) fp_drop(p, old, fp, 1);
2377 		proc_fdunlock(p);
2378 		return error;
2379 	}
2380 	if ((error = fdalloc(p, 0, &new))) {
2381 		fp_drop(p, old, fp, 1);
2382 		proc_fdunlock(p);
2383 		return error;
2384 	}
2385 	error = finishdup(p, fdp, old, new, 0, retval);
2386 
2387 	if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
2388 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
2389 		    new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp_get_data(fp)));
2390 	}
2391 
2392 	fp_drop(p, old, fp, 1);
2393 	proc_fdunlock(p);
2394 
2395 	return error;
2396 }
2397 
2398 /*
2399  * sys_dup2
2400  *
2401  * Description:	Duplicate a file descriptor to a particular value.
2402  *
2403  * Parameters:	p				Process performing the dup
2404  *		uap->from			The fd to dup
2405  *		uap->to				The fd to dup it to
2406  *		retval				Pointer to the call return area
2407  *
2408  * Returns:	0				Success
2409  *		!0				Errno
2410  *
2411  * Implicit returns:
2412  *		*retval (modified)		The new descriptor
2413  */
2414 int
sys_dup2(proc_t p,struct dup2_args * uap,int32_t * retval)2415 sys_dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
2416 {
2417 	return dup2(p, uap->from, uap->to, retval);
2418 }
2419 
2420 int
dup2(proc_t p,int old,int new,int * retval)2421 dup2(proc_t p, int old, int new, int *retval)
2422 {
2423 	struct filedesc *fdp = &p->p_fd;
2424 	struct fileproc *fp, *nfp;
2425 	int i, error;
2426 
2427 	proc_fdlock(p);
2428 
2429 startover:
2430 	if ((error = fp_lookup(p, old, &fp, 1))) {
2431 		proc_fdunlock(p);
2432 		return error;
2433 	}
2434 	if (fp_isguarded(fp, GUARD_DUP)) {
2435 		error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2436 		(void) fp_drop(p, old, fp, 1);
2437 		proc_fdunlock(p);
2438 		return error;
2439 	}
2440 	if (new < 0 || new >= proc_limitgetcur_nofile(p)) {
2441 		fp_drop(p, old, fp, 1);
2442 		proc_fdunlock(p);
2443 		return EBADF;
2444 	}
2445 	if (old == new) {
2446 		fp_drop(p, old, fp, 1);
2447 		*retval = new;
2448 		proc_fdunlock(p);
2449 		return 0;
2450 	}
2451 	if (new < 0 || new >= fdp->fd_nfiles) {
2452 		if ((error = fdalloc(p, new, &i))) {
2453 			fp_drop(p, old, fp, 1);
2454 			proc_fdunlock(p);
2455 			return error;
2456 		}
2457 		if (new != i) {
2458 			fdrelse(p, i);
2459 			goto closeit;
2460 		}
2461 	} else {
2462 closeit:
2463 		if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
2464 			fp_drop(p, old, fp, 1);
2465 			procfdtbl_waitfd(p, new);
2466 #if DIAGNOSTIC
2467 			proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2468 #endif
2469 			goto startover;
2470 		}
2471 
2472 		if ((nfp = fdp->fd_ofiles[new]) != NULL) {
2473 			if (fp_isguarded(nfp, GUARD_CLOSE)) {
2474 				fp_drop(p, old, fp, 1);
2475 				error = fp_guard_exception(p,
2476 				    new, nfp, kGUARD_EXC_CLOSE);
2477 				proc_fdunlock(p);
2478 				return error;
2479 			}
2480 			(void)fp_close_and_unlock(p, new, nfp, FD_DUP2RESV);
2481 			proc_fdlock(p);
2482 			assert(fdp->fd_ofileflags[new] & UF_RESERVED);
2483 		} else {
2484 #if DIAGNOSTIC
2485 			if (fdp->fd_ofiles[new] != NULL) {
2486 				panic("dup2: no ref on fileproc %d", new);
2487 			}
2488 #endif
2489 			procfdtbl_reservefd(p, new);
2490 		}
2491 	}
2492 #if DIAGNOSTIC
2493 	if (fdp->fd_ofiles[new] != 0) {
2494 		panic("dup2: overwriting fd_ofiles with new %d", new);
2495 	}
2496 	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
2497 		panic("dup2: unreserved fileflags with new %d", new);
2498 	}
2499 #endif
2500 	error = finishdup(p, fdp, old, new, 0, retval);
2501 	fp_drop(p, old, fp, 1);
2502 	proc_fdunlock(p);
2503 
2504 	return error;
2505 }
2506 
2507 
2508 /*
2509  * fcntl
2510  *
2511  * Description:	The file control system call.
2512  *
2513  * Parameters:	p				Process performing the fcntl
2514  *		uap->fd				The fd to operate against
2515  *		uap->cmd			The command to perform
2516  *		uap->arg			Pointer to the command argument
2517  *		retval				Pointer to the call return area
2518  *
2519  * Returns:	0				Success
2520  *		!0				Errno (see fcntl_nocancel)
2521  *
2522  * Implicit returns:
2523  *		*retval (modified)		fcntl return value (if any)
2524  *
2525  * Notes:	This system call differs from fcntl_nocancel() in that it
2526  *		tests for cancellation prior to performing a potentially
2527  *		blocking operation.
2528  */
2529 int
sys_fcntl(proc_t p,struct fcntl_args * uap,int32_t * retval)2530 sys_fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
2531 {
2532 	__pthread_testcancel(1);
2533 	return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
2534 }
2535 
2536 #define ACCOUNT_OPENFROM_ENTITLEMENT \
2537 	"com.apple.private.vfs.role-account-openfrom"
2538 
2539 /*
2540  * sys_fcntl_nocancel
2541  *
2542  * Description:	A non-cancel-testing file control system call.
2543  *
2544  * Parameters:	p				Process performing the fcntl
2545  *		uap->fd				The fd to operate against
2546  *		uap->cmd			The command to perform
2547  *		uap->arg			Pointer to the command argument
2548  *		retval				Pointer to the call return area
2549  *
2550  * Returns:	0				Success
2551  *		EINVAL
2552  *	fp_lookup:EBADF				Bad file descriptor
2553  * [F_DUPFD]
2554  *	fdalloc:EMFILE
2555  *	fdalloc:ENOMEM
2556  *	finishdup:EBADF
2557  *	finishdup:ENOMEM
2558  * [F_SETOWN]
2559  *		ESRCH
2560  * [F_SETLK]
2561  *		EBADF
2562  *		EOVERFLOW
2563  *	copyin:EFAULT
2564  *	vnode_getwithref:???
2565  *	VNOP_ADVLOCK:???
2566  *	msleep:ETIMEDOUT
2567  * [F_GETLK]
2568  *		EBADF
2569  *		EOVERFLOW
2570  *	copyin:EFAULT
2571  *	copyout:EFAULT
2572  *	vnode_getwithref:???
2573  *	VNOP_ADVLOCK:???
2574  * [F_PREALLOCATE]
2575  *		EBADF
2576  *		EFBIG
2577  *		EINVAL
2578  *		ENOSPC
2579  *	copyin:EFAULT
2580  *	copyout:EFAULT
2581  *	vnode_getwithref:???
2582  *	VNOP_ALLOCATE:???
2583  * [F_SETSIZE,F_RDADVISE]
2584  *		EBADF
2585  *		EINVAL
2586  *	copyin:EFAULT
2587  *	vnode_getwithref:???
2588  * [F_RDAHEAD,F_NOCACHE]
2589  *		EBADF
2590  *	vnode_getwithref:???
2591  * [???]
2592  *
2593  * Implicit returns:
2594  *		*retval (modified)		fcntl return value (if any)
2595  */
2596 #define SYS_FCNTL_DECLARE_VFS_CONTEXT(context) \
2597 	struct vfs_context context = { \
2598 	    .vc_thread = current_thread(), \
2599 	    .vc_ucred = fp->f_cred, \
2600 	}
2601 
2602 static user_addr_t
sys_fnctl_parse_arg(proc_t p,user_long_t arg)2603 sys_fnctl_parse_arg(proc_t p, user_long_t arg)
2604 {
2605 	/*
2606 	 * Since the arg parameter is defined as a long but may be
2607 	 * either a long or a pointer we must take care to handle
2608 	 * sign extension issues.  Our sys call munger will sign
2609 	 * extend a long when we are called from a 32-bit process.
2610 	 * Since we can never have an address greater than 32-bits
2611 	 * from a 32-bit process we lop off the top 32-bits to avoid
2612 	 * getting the wrong address
2613 	 */
2614 	return proc_is64bit(p) ? arg : CAST_USER_ADDR_T((uint32_t)arg);
2615 }
2616 
2617 /* cleanup code common to fnctl functions, for when the fdlock is still held */
2618 static int
sys_fcntl_out(proc_t p,int fd,struct fileproc * fp,int error)2619 sys_fcntl_out(proc_t p, int fd, struct fileproc *fp, int error)
2620 {
2621 	fp_drop(p, fd, fp, 1);
2622 	proc_fdunlock(p);
2623 	return error;
2624 }
2625 
2626 /* cleanup code common to fnctl acting on vnodes, once they unlocked the fdlock */
2627 static int
sys_fcntl_outdrop(proc_t p,int fd,struct fileproc * fp,struct vnode * vp,int error)2628 sys_fcntl_outdrop(proc_t p, int fd, struct fileproc *fp, struct vnode *vp, int error)
2629 {
2630 #pragma unused(vp)
2631 
2632 	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2633 	fp_drop(p, fd, fp, 0);
2634 	return error;
2635 }
2636 
2637 typedef int (*sys_fnctl_handler_t)(proc_t p, int fd, int cmd, user_long_t arg,
2638     struct fileproc *fp, int32_t *retval);
2639 
2640 typedef int (*sys_fnctl_vnode_handler_t)(proc_t p, int fd, int cmd,
2641     user_long_t arg, struct fileproc *fp, struct vnode *vp, int32_t *retval);
2642 
2643 /*
2644  * SPI (private) for opening a file starting from a dir fd
2645  *
2646  * Note: do not inline to keep stack usage under control.
2647  */
2648 __attribute__((noinline))
2649 static int
sys_fcntl__OPENFROM(proc_t p,int fd,int cmd,user_long_t arg,struct fileproc * fp,struct vnode * vp,int32_t * retval)2650 sys_fcntl__OPENFROM(proc_t p, int fd, int cmd, user_long_t arg,
2651     struct fileproc *fp, struct vnode *vp, int32_t *retval)
2652 {
2653 #pragma unused(cmd)
2654 
2655 	user_addr_t argp = sys_fnctl_parse_arg(p, arg);
2656 	struct user_fopenfrom fopen;
2657 	struct vnode_attr *va;
2658 	struct nameidata *nd;
2659 	int error, cmode;
2660 	bool has_entitlement;
2661 
2662 	/* Check if this isn't a valid file descriptor */
2663 	if ((fp->f_flag & FREAD) == 0) {
2664 		return sys_fcntl_out(p, fd, fp, EBADF);
2665 	}
2666 	proc_fdunlock(p);
2667 
2668 	if (vnode_getwithref(vp)) {
2669 		error = ENOENT;
2670 		goto outdrop;
2671 	}
2672 
2673 	/* Only valid for directories */
2674 	if (vp->v_type != VDIR) {
2675 		vnode_put(vp);
2676 		error = ENOTDIR;
2677 		goto outdrop;
2678 	}
2679 
2680 	/*
2681 	 * Only entitled apps may use the credentials of the thread
2682 	 * that opened the file descriptor.
2683 	 * Non-entitled threads will use their own context.
2684 	 */
2685 	has_entitlement = IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT);
2686 
2687 	/* Get flags, mode and pathname arguments. */
2688 	if (IS_64BIT_PROCESS(p)) {
2689 		error = copyin(argp, &fopen, sizeof(fopen));
2690 	} else {
2691 		struct user32_fopenfrom fopen32;
2692 
2693 		error = copyin(argp, &fopen32, sizeof(fopen32));
2694 		fopen.o_flags = fopen32.o_flags;
2695 		fopen.o_mode = fopen32.o_mode;
2696 		fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
2697 	}
2698 	if (error) {
2699 		vnode_put(vp);
2700 		goto outdrop;
2701 	}
2702 
2703 	/* open1() can have really deep stacks, so allocate those */
2704 	va = kalloc_type(struct vnode_attr, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2705 	nd = kalloc_type(struct nameidata, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2706 
2707 	AUDIT_ARG(fflags, fopen.o_flags);
2708 	AUDIT_ARG(mode, fopen.o_mode);
2709 	VATTR_INIT(va);
2710 	/* Mask off all but regular access permissions */
2711 	cmode = ((fopen.o_mode & ~p->p_fd.fd_cmask) & ALLPERMS) & ~S_ISTXT;
2712 	VATTR_SET(va, va_mode, cmode & ACCESSPERMS);
2713 
2714 	SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2715 
2716 	/* Start the lookup relative to the file descriptor's vnode. */
2717 	NDINIT(nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2718 	    fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
2719 	nd->ni_dvp = vp;
2720 
2721 	error = open1(has_entitlement ? &context : vfs_context_current(),
2722 	    nd, fopen.o_flags, va, NULL, NULL, retval, AUTH_OPEN_NOAUTHFD);
2723 
2724 	kfree_type(struct vnode_attr, va);
2725 	kfree_type(struct nameidata, nd);
2726 
2727 	vnode_put(vp);
2728 
2729 outdrop:
2730 	return sys_fcntl_outdrop(p, fd, fp, vp, error);
2731 }
2732 
2733 int
sys_fcntl_nocancel(proc_t p,struct fcntl_nocancel_args * uap,int32_t * retval)2734 sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
2735 {
2736 	int fd = uap->fd;
2737 	int cmd = uap->cmd;
2738 	struct filedesc *fdp = &p->p_fd;
2739 	struct fileproc *fp;
2740 	struct vnode *vp = NULLVP;      /* for AUDIT_ARG() at end */
2741 	unsigned int oflags, nflags;
2742 	int i, tmp, error, error2, flg = 0;
2743 	struct flock fl = {};
2744 	struct flocktimeout fltimeout;
2745 	struct timespec *timeout = NULL;
2746 	off_t offset;
2747 	int newmin;
2748 	daddr64_t lbn, bn;
2749 	unsigned int fflag;
2750 	user_addr_t argp;
2751 	boolean_t is64bit;
2752 	int has_entitlement = 0;
2753 
2754 	AUDIT_ARG(fd, uap->fd);
2755 	AUDIT_ARG(cmd, uap->cmd);
2756 
2757 	proc_fdlock(p);
2758 	if ((error = fp_lookup(p, fd, &fp, 1))) {
2759 		proc_fdunlock(p);
2760 		return error;
2761 	}
2762 
2763 	SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2764 
2765 	is64bit = proc_is64bit(p);
2766 	if (is64bit) {
2767 		argp = uap->arg;
2768 	} else {
2769 		/*
2770 		 * Since the arg parameter is defined as a long but may be
2771 		 * either a long or a pointer we must take care to handle
2772 		 * sign extension issues.  Our sys call munger will sign
2773 		 * extend a long when we are called from a 32-bit process.
2774 		 * Since we can never have an address greater than 32-bits
2775 		 * from a 32-bit process we lop off the top 32-bits to avoid
2776 		 * getting the wrong address
2777 		 */
2778 		argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
2779 	}
2780 
2781 #if CONFIG_MACF
2782 	error = mac_file_check_fcntl(kauth_cred_get(), fp->fp_glob, cmd, uap->arg);
2783 	if (error) {
2784 		goto out;
2785 	}
2786 #endif
2787 
2788 	switch (cmd) {
2789 	case F_DUPFD:
2790 	case F_DUPFD_CLOEXEC:
2791 		if (fp_isguarded(fp, GUARD_DUP)) {
2792 			error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
2793 			goto out;
2794 		}
2795 		newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2796 		AUDIT_ARG(value32, newmin);
2797 		if (newmin < 0 || newmin >= proc_limitgetcur_nofile(p)) {
2798 			error = EINVAL;
2799 			goto out;
2800 		}
2801 		if ((error = fdalloc(p, newmin, &i))) {
2802 			goto out;
2803 		}
2804 		error = finishdup(p, fdp, fd, i,
2805 		    cmd == F_DUPFD_CLOEXEC ? FP_CLOEXEC : 0, retval);
2806 		goto out;
2807 
2808 	case F_GETFD:
2809 		*retval = (fp->fp_flags & FP_CLOEXEC) ? FD_CLOEXEC : 0;
2810 		error = 0;
2811 		goto out;
2812 
2813 	case F_SETFD:
2814 		AUDIT_ARG(value32, (uint32_t)uap->arg);
2815 		if (uap->arg & FD_CLOEXEC) {
2816 			fp->fp_flags |= FP_CLOEXEC;
2817 			error = 0;
2818 		} else if (!fp->fp_guard_attrs) {
2819 			fp->fp_flags &= ~FP_CLOEXEC;
2820 			error = 0;
2821 		} else {
2822 			error = fp_guard_exception(p,
2823 			    fd, fp, kGUARD_EXC_NOCLOEXEC);
2824 		}
2825 		goto out;
2826 
2827 	case F_GETFL:
2828 		fflag = fp->f_flag;
2829 		if ((fflag & O_EVTONLY) && proc_disallow_rw_for_o_evtonly(p)) {
2830 			/*
2831 			 * We insert back F_READ so that conversion back to open flags with
2832 			 * OFLAGS() will come out right. We only need to set 'FREAD' as the
2833 			 * 'O_RDONLY' is always implied.
2834 			 */
2835 			fflag |= FREAD;
2836 		}
2837 		*retval = OFLAGS(fflag);
2838 		error = 0;
2839 		goto out;
2840 
2841 	case F_SETFL:
2842 		// FIXME (rdar://54898652)
2843 		//
2844 		// this code is broken if fnctl(F_SETFL), ioctl() are
2845 		// called concurrently for the same fileglob.
2846 
2847 		tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2848 		AUDIT_ARG(value32, tmp);
2849 
2850 		os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
2851 			nflags  = oflags & ~FCNTLFLAGS;
2852 			nflags |= FFLAGS(tmp) & FCNTLFLAGS;
2853 		});
2854 		tmp = nflags & FNONBLOCK;
2855 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2856 		if (error) {
2857 			goto out;
2858 		}
2859 		tmp = nflags & FASYNC;
2860 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
2861 		if (!error) {
2862 			goto out;
2863 		}
2864 		os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
2865 		tmp = 0;
2866 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2867 		goto out;
2868 
2869 	case F_GETOWN:
2870 		if (fp->f_type == DTYPE_SOCKET) {
2871 			*retval = ((struct socket *)fp_get_data(fp))->so_pgid;
2872 			error = 0;
2873 			goto out;
2874 		}
2875 		error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
2876 		*retval = -*retval;
2877 		goto out;
2878 
2879 	case F_SETOWN:
2880 		tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
2881 		AUDIT_ARG(value32, tmp);
2882 		if (fp->f_type == DTYPE_SOCKET) {
2883 			((struct socket *)fp_get_data(fp))->so_pgid = tmp;
2884 			error = 0;
2885 			goto out;
2886 		}
2887 		if (fp->f_type == DTYPE_PIPE) {
2888 			error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2889 			goto out;
2890 		}
2891 
2892 		if (tmp <= 0) {
2893 			tmp = -tmp;
2894 		} else {
2895 			proc_t p1 = proc_find(tmp);
2896 			if (p1 == 0) {
2897 				error = ESRCH;
2898 				goto out;
2899 			}
2900 			tmp = (int)p1->p_pgrpid;
2901 			proc_rele(p1);
2902 		}
2903 		error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2904 		goto out;
2905 
2906 	case F_SETNOSIGPIPE:
2907 		tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
2908 		if (fp->f_type == DTYPE_SOCKET) {
2909 #if SOCKETS
2910 			error = sock_setsockopt((struct socket *)fp_get_data(fp),
2911 			    SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
2912 #else
2913 			error = EINVAL;
2914 #endif
2915 		} else {
2916 			struct fileglob *fg = fp->fp_glob;
2917 
2918 			lck_mtx_lock_spin(&fg->fg_lock);
2919 			if (tmp) {
2920 				fg->fg_lflags |= FG_NOSIGPIPE;
2921 			} else {
2922 				fg->fg_lflags &= ~FG_NOSIGPIPE;
2923 			}
2924 			lck_mtx_unlock(&fg->fg_lock);
2925 			error = 0;
2926 		}
2927 		goto out;
2928 
2929 	case F_GETNOSIGPIPE:
2930 		if (fp->f_type == DTYPE_SOCKET) {
2931 #if SOCKETS
2932 			int retsize = sizeof(*retval);
2933 			error = sock_getsockopt((struct socket *)fp_get_data(fp),
2934 			    SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
2935 #else
2936 			error = EINVAL;
2937 #endif
2938 		} else {
2939 			*retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
2940 			    1 : 0;
2941 			error = 0;
2942 		}
2943 		goto out;
2944 
2945 	case F_SETCONFINED:
2946 		/*
2947 		 * If this is the only reference to this fglob in the process
2948 		 * and it's already marked as close-on-fork then mark it as
2949 		 * (immutably) "confined" i.e. any fd that points to it will
2950 		 * forever be close-on-fork, and attempts to use an IPC
2951 		 * mechanism to move the descriptor elsewhere will fail.
2952 		 */
2953 		if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
2954 			struct fileglob *fg = fp->fp_glob;
2955 
2956 			lck_mtx_lock_spin(&fg->fg_lock);
2957 			if (fg->fg_lflags & FG_CONFINED) {
2958 				error = 0;
2959 			} else if (1 != os_ref_get_count_raw(&fg->fg_count)) {
2960 				error = EAGAIN; /* go close the dup .. */
2961 			} else if (fp->fp_flags & FP_CLOFORK) {
2962 				fg->fg_lflags |= FG_CONFINED;
2963 				error = 0;
2964 			} else {
2965 				error = EBADF;  /* open without O_CLOFORK? */
2966 			}
2967 			lck_mtx_unlock(&fg->fg_lock);
2968 		} else {
2969 			/*
2970 			 * Other subsystems may have built on the immutability
2971 			 * of FG_CONFINED; clearing it may be tricky.
2972 			 */
2973 			error = EPERM;          /* immutable */
2974 		}
2975 		goto out;
2976 
2977 	case F_GETCONFINED:
2978 		*retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? 1 : 0;
2979 		error = 0;
2980 		goto out;
2981 
2982 	case F_SETLKWTIMEOUT:
2983 	case F_SETLKW:
2984 	case F_OFD_SETLKWTIMEOUT:
2985 	case F_OFD_SETLKW:
2986 		flg |= F_WAIT;
2987 		OS_FALLTHROUGH;
2988 
2989 	case F_SETLK:
2990 	case F_OFD_SETLK:
2991 		if (fp->f_type != DTYPE_VNODE) {
2992 			error = EBADF;
2993 			goto out;
2994 		}
2995 		vp = (struct vnode *)fp_get_data(fp);
2996 
2997 		fflag = fp->f_flag;
2998 		offset = fp->f_offset;
2999 		proc_fdunlock(p);
3000 
3001 		/* Copy in the lock structure */
3002 		if (F_SETLKWTIMEOUT == cmd || F_OFD_SETLKWTIMEOUT == cmd) {
3003 			error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
3004 			if (error) {
3005 				goto outdrop;
3006 			}
3007 			fl = fltimeout.fl;
3008 			timeout = &fltimeout.timeout;
3009 		} else {
3010 			error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3011 			if (error) {
3012 				goto outdrop;
3013 			}
3014 		}
3015 
3016 		/* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3017 		/* and ending byte for EOVERFLOW in SEEK_SET */
3018 		error = check_file_seek_range(&fl, offset);
3019 		if (error) {
3020 			goto outdrop;
3021 		}
3022 
3023 		if ((error = vnode_getwithref(vp))) {
3024 			goto outdrop;
3025 		}
3026 		if (fl.l_whence == SEEK_CUR) {
3027 			fl.l_start += offset;
3028 		}
3029 
3030 #if CONFIG_MACF
3031 		error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3032 		    F_SETLK, &fl);
3033 		if (error) {
3034 			(void)vnode_put(vp);
3035 			goto outdrop;
3036 		}
3037 #endif
3038 
3039 #if CONFIG_FILE_LEASES
3040 		(void)vnode_breaklease(vp, O_WRONLY, vfs_context_current());
3041 #endif
3042 
3043 		switch (cmd) {
3044 		case F_OFD_SETLK:
3045 		case F_OFD_SETLKW:
3046 		case F_OFD_SETLKWTIMEOUT:
3047 			flg |= F_OFD_LOCK;
3048 			if (fp->fp_glob->fg_lflags & FG_CONFINED) {
3049 				flg |= F_CONFINED;
3050 			}
3051 			switch (fl.l_type) {
3052 			case F_RDLCK:
3053 				if ((fflag & FREAD) == 0) {
3054 					error = EBADF;
3055 					break;
3056 				}
3057 				error = VNOP_ADVLOCK(vp, ofd_to_id(fp->fp_glob),
3058 				    F_SETLK, &fl, flg, &context, timeout);
3059 				break;
3060 			case F_WRLCK:
3061 				if ((fflag & FWRITE) == 0) {
3062 					error = EBADF;
3063 					break;
3064 				}
3065 				error = VNOP_ADVLOCK(vp, ofd_to_id(fp->fp_glob),
3066 				    F_SETLK, &fl, flg, &context, timeout);
3067 				break;
3068 			case F_UNLCK:
3069 				error = VNOP_ADVLOCK(vp, ofd_to_id(fp->fp_glob),
3070 				    F_UNLCK, &fl, F_OFD_LOCK, &context,
3071 				    timeout);
3072 				break;
3073 			default:
3074 				error = EINVAL;
3075 				break;
3076 			}
3077 			if (0 == error &&
3078 			    (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
3079 				struct fileglob *fg = fp->fp_glob;
3080 
3081 				/*
3082 				 * arrange F_UNLCK on last close (once
3083 				 * set, FG_HAS_OFDLOCK is immutable)
3084 				 */
3085 				if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
3086 					lck_mtx_lock_spin(&fg->fg_lock);
3087 					fg->fg_lflags |= FG_HAS_OFDLOCK;
3088 					lck_mtx_unlock(&fg->fg_lock);
3089 				}
3090 			}
3091 			break;
3092 		default:
3093 			flg |= F_POSIX;
3094 			switch (fl.l_type) {
3095 			case F_RDLCK:
3096 				if ((fflag & FREAD) == 0) {
3097 					error = EBADF;
3098 					break;
3099 				}
3100 				// XXX UInt32 unsafe for LP64 kernel
3101 				os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3102 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3103 				    F_SETLK, &fl, flg, &context, timeout);
3104 				break;
3105 			case F_WRLCK:
3106 				if ((fflag & FWRITE) == 0) {
3107 					error = EBADF;
3108 					break;
3109 				}
3110 				// XXX UInt32 unsafe for LP64 kernel
3111 				os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3112 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3113 				    F_SETLK, &fl, flg, &context, timeout);
3114 				break;
3115 			case F_UNLCK:
3116 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3117 				    F_UNLCK, &fl, F_POSIX, &context, timeout);
3118 				break;
3119 			default:
3120 				error = EINVAL;
3121 				break;
3122 			}
3123 			break;
3124 		}
3125 		(void) vnode_put(vp);
3126 		goto outdrop;
3127 
3128 	case F_GETLK:
3129 	case F_OFD_GETLK:
3130 	case F_GETLKPID:
3131 	case F_OFD_GETLKPID:
3132 		if (fp->f_type != DTYPE_VNODE) {
3133 			error = EBADF;
3134 			goto out;
3135 		}
3136 		vp = (struct vnode *)fp_get_data(fp);
3137 
3138 		offset = fp->f_offset;
3139 		proc_fdunlock(p);
3140 
3141 		/* Copy in the lock structure */
3142 		error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3143 		if (error) {
3144 			goto outdrop;
3145 		}
3146 
3147 		/* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3148 		/* and ending byte for EOVERFLOW in SEEK_SET */
3149 		error = check_file_seek_range(&fl, offset);
3150 		if (error) {
3151 			goto outdrop;
3152 		}
3153 
3154 		if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
3155 			error = EINVAL;
3156 			goto outdrop;
3157 		}
3158 
3159 		switch (fl.l_type) {
3160 		case F_RDLCK:
3161 		case F_UNLCK:
3162 		case F_WRLCK:
3163 			break;
3164 		default:
3165 			error = EINVAL;
3166 			goto outdrop;
3167 		}
3168 
3169 		switch (fl.l_whence) {
3170 		case SEEK_CUR:
3171 		case SEEK_SET:
3172 		case SEEK_END:
3173 			break;
3174 		default:
3175 			error = EINVAL;
3176 			goto outdrop;
3177 		}
3178 
3179 		if ((error = vnode_getwithref(vp)) == 0) {
3180 			if (fl.l_whence == SEEK_CUR) {
3181 				fl.l_start += offset;
3182 			}
3183 
3184 #if CONFIG_MACF
3185 			error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3186 			    cmd, &fl);
3187 			if (error == 0)
3188 #endif
3189 			switch (cmd) {
3190 			case F_OFD_GETLK:
3191 				error = VNOP_ADVLOCK(vp, ofd_to_id(fp->fp_glob),
3192 				    F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
3193 				break;
3194 			case F_OFD_GETLKPID:
3195 				error = VNOP_ADVLOCK(vp, ofd_to_id(fp->fp_glob),
3196 				    F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
3197 				break;
3198 			default:
3199 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3200 				    cmd, &fl, F_POSIX, &context, NULL);
3201 				break;
3202 			}
3203 
3204 			(void)vnode_put(vp);
3205 
3206 			if (error == 0) {
3207 				error = copyout((caddr_t)&fl, argp, sizeof(fl));
3208 			}
3209 		}
3210 		goto outdrop;
3211 
3212 	case F_PREALLOCATE: {
3213 		fstore_t alloc_struct;    /* structure for allocate command */
3214 		u_int32_t alloc_flags = 0;
3215 
3216 		if (fp->f_type != DTYPE_VNODE) {
3217 			error = EBADF;
3218 			goto out;
3219 		}
3220 
3221 		vp = (struct vnode *)fp_get_data(fp);
3222 		proc_fdunlock(p);
3223 
3224 		/* make sure that we have write permission */
3225 		if ((fp->f_flag & FWRITE) == 0) {
3226 			error = EBADF;
3227 			goto outdrop;
3228 		}
3229 
3230 		error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
3231 		if (error) {
3232 			goto outdrop;
3233 		}
3234 
3235 		/* now set the space allocated to 0 */
3236 		alloc_struct.fst_bytesalloc = 0;
3237 
3238 		/*
3239 		 * Do some simple parameter checking
3240 		 */
3241 
3242 		/* set up the flags */
3243 
3244 		alloc_flags |= PREALLOCATE;
3245 
3246 		if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
3247 			alloc_flags |= ALLOCATECONTIG;
3248 		}
3249 
3250 		if (alloc_struct.fst_flags & F_ALLOCATEALL) {
3251 			alloc_flags |= ALLOCATEALL;
3252 		}
3253 
3254 		if (alloc_struct.fst_flags & F_ALLOCATEPERSIST) {
3255 			alloc_flags |= ALLOCATEPERSIST;
3256 		}
3257 
3258 		/*
3259 		 * Do any position mode specific stuff.  The only
3260 		 * position mode  supported now is PEOFPOSMODE
3261 		 */
3262 
3263 		switch (alloc_struct.fst_posmode) {
3264 		case F_PEOFPOSMODE:
3265 			if (alloc_struct.fst_offset != 0) {
3266 				error = EINVAL;
3267 				goto outdrop;
3268 			}
3269 
3270 			alloc_flags |= ALLOCATEFROMPEOF;
3271 			break;
3272 
3273 		case F_VOLPOSMODE:
3274 			if (alloc_struct.fst_offset <= 0) {
3275 				error = EINVAL;
3276 				goto outdrop;
3277 			}
3278 
3279 			alloc_flags |= ALLOCATEFROMVOL;
3280 			break;
3281 
3282 		default: {
3283 			error = EINVAL;
3284 			goto outdrop;
3285 		}
3286 		}
3287 		if ((error = vnode_getwithref(vp)) == 0) {
3288 			/*
3289 			 * call allocate to get the space
3290 			 */
3291 			error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
3292 			    &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
3293 			    &context);
3294 			(void)vnode_put(vp);
3295 
3296 			error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
3297 
3298 			if (error == 0) {
3299 				error = error2;
3300 			}
3301 		}
3302 		goto outdrop;
3303 	}
3304 	case F_PUNCHHOLE: {
3305 		fpunchhole_t args;
3306 
3307 		if (fp->f_type != DTYPE_VNODE) {
3308 			error = EBADF;
3309 			goto out;
3310 		}
3311 
3312 		vp = (struct vnode *)fp_get_data(fp);
3313 		proc_fdunlock(p);
3314 
3315 		/* need write permissions */
3316 		if ((fp->f_flag & FWRITE) == 0) {
3317 			error = EPERM;
3318 			goto outdrop;
3319 		}
3320 
3321 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3322 			goto outdrop;
3323 		}
3324 
3325 		if ((error = vnode_getwithref(vp))) {
3326 			goto outdrop;
3327 		}
3328 
3329 #if CONFIG_MACF
3330 		if ((error = mac_vnode_check_write(&context, fp->fp_glob->fg_cred, vp))) {
3331 			(void)vnode_put(vp);
3332 			goto outdrop;
3333 		}
3334 #endif
3335 
3336 		error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
3337 		(void)vnode_put(vp);
3338 
3339 		goto outdrop;
3340 	}
3341 	case F_TRIM_ACTIVE_FILE: {
3342 		ftrimactivefile_t args;
3343 
3344 		if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
3345 			error = EACCES;
3346 			goto out;
3347 		}
3348 
3349 		if (fp->f_type != DTYPE_VNODE) {
3350 			error = EBADF;
3351 			goto out;
3352 		}
3353 
3354 		vp = (struct vnode *)fp_get_data(fp);
3355 		proc_fdunlock(p);
3356 
3357 		/* need write permissions */
3358 		if ((fp->f_flag & FWRITE) == 0) {
3359 			error = EPERM;
3360 			goto outdrop;
3361 		}
3362 
3363 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3364 			goto outdrop;
3365 		}
3366 
3367 		if ((error = vnode_getwithref(vp))) {
3368 			goto outdrop;
3369 		}
3370 
3371 		error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
3372 		(void)vnode_put(vp);
3373 
3374 		goto outdrop;
3375 	}
3376 	case F_SPECULATIVE_READ: {
3377 		fspecread_t args;
3378 		off_t temp_length = 0;
3379 
3380 		if (fp->f_type != DTYPE_VNODE) {
3381 			error = EBADF;
3382 			goto out;
3383 		}
3384 
3385 		vp = (struct vnode *)fp_get_data(fp);
3386 		proc_fdunlock(p);
3387 
3388 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3389 			goto outdrop;
3390 		}
3391 
3392 		/* Discard invalid offsets or lengths */
3393 		if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
3394 			error = EINVAL;
3395 			goto outdrop;
3396 		}
3397 
3398 		/*
3399 		 * Round the file offset down to a page-size boundary (or to 0).
3400 		 * The filesystem will need to round the length up to the end of the page boundary
3401 		 * or to the EOF of the file.
3402 		 */
3403 		uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
3404 		uint64_t foff_delta = args.fsr_offset - foff;
3405 		args.fsr_offset = (off_t) foff;
3406 
3407 		/*
3408 		 * Now add in the delta to the supplied length. Since we may have adjusted the
3409 		 * offset, increase it by the amount that we adjusted.
3410 		 */
3411 		if (os_add_overflow(args.fsr_length, foff_delta, &args.fsr_length)) {
3412 			error = EOVERFLOW;
3413 			goto outdrop;
3414 		}
3415 
3416 		/*
3417 		 * Make sure (fsr_offset + fsr_length) does not overflow.
3418 		 */
3419 		if (os_add_overflow(args.fsr_offset, args.fsr_length, &temp_length)) {
3420 			error = EOVERFLOW;
3421 			goto outdrop;
3422 		}
3423 
3424 		if ((error = vnode_getwithref(vp))) {
3425 			goto outdrop;
3426 		}
3427 		error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
3428 		(void)vnode_put(vp);
3429 
3430 		goto outdrop;
3431 	}
3432 	case F_ATTRIBUTION_TAG: {
3433 		fattributiontag_t args;
3434 
3435 		if (fp->f_type != DTYPE_VNODE) {
3436 			error = EBADF;
3437 			goto out;
3438 		}
3439 
3440 		vp = (struct vnode *)fp_get_data(fp);
3441 		proc_fdunlock(p);
3442 
3443 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3444 			goto outdrop;
3445 		}
3446 
3447 		if ((error = vnode_getwithref(vp))) {
3448 			goto outdrop;
3449 		}
3450 
3451 		error = VNOP_IOCTL(vp, F_ATTRIBUTION_TAG, (caddr_t)&args, 0, &context);
3452 		(void)vnode_put(vp);
3453 
3454 		if (error == 0) {
3455 			error = copyout((caddr_t)&args, argp, sizeof(args));
3456 		}
3457 
3458 		goto outdrop;
3459 	}
3460 	case F_SETSIZE:
3461 		if (fp->f_type != DTYPE_VNODE) {
3462 			error = EBADF;
3463 			goto out;
3464 		}
3465 		vp = (struct vnode *)fp_get_data(fp);
3466 		proc_fdunlock(p);
3467 
3468 		error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
3469 		if (error) {
3470 			goto outdrop;
3471 		}
3472 		AUDIT_ARG(value64, offset);
3473 
3474 		error = vnode_getwithref(vp);
3475 		if (error) {
3476 			goto outdrop;
3477 		}
3478 
3479 #if CONFIG_MACF
3480 		error = mac_vnode_check_truncate(&context,
3481 		    fp->fp_glob->fg_cred, vp);
3482 		if (error) {
3483 			(void)vnode_put(vp);
3484 			goto outdrop;
3485 		}
3486 #endif
3487 		/*
3488 		 * Make sure that we are root.  Growing a file
3489 		 * without zero filling the data is a security hole.
3490 		 */
3491 		if (!kauth_cred_issuser(kauth_cred_get())) {
3492 			error = EACCES;
3493 		} else {
3494 			/*
3495 			 * Require privilege to change file size without zerofill,
3496 			 * else will change the file size and zerofill it.
3497 			 */
3498 			error = priv_check_cred(kauth_cred_get(), PRIV_VFS_SETSIZE, 0);
3499 			if (error == 0) {
3500 				error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
3501 			} else {
3502 				error = vnode_setsize(vp, offset, 0, &context);
3503 			}
3504 
3505 #if CONFIG_MACF
3506 			if (error == 0) {
3507 				mac_vnode_notify_truncate(&context, fp->fp_glob->fg_cred, vp);
3508 			}
3509 #endif
3510 		}
3511 
3512 		(void)vnode_put(vp);
3513 		goto outdrop;
3514 
3515 	case F_RDAHEAD:
3516 		if (fp->f_type != DTYPE_VNODE) {
3517 			error = EBADF;
3518 			goto out;
3519 		}
3520 		if (uap->arg) {
3521 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3522 		} else {
3523 			os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3524 		}
3525 		goto out;
3526 
3527 	case F_NOCACHE:
3528 		if (fp->f_type != DTYPE_VNODE) {
3529 			error = EBADF;
3530 			goto out;
3531 		}
3532 		if (uap->arg) {
3533 			os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3534 		} else {
3535 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3536 		}
3537 		goto out;
3538 
3539 	case F_NODIRECT:
3540 		if (fp->f_type != DTYPE_VNODE) {
3541 			error = EBADF;
3542 			goto out;
3543 		}
3544 		if (uap->arg) {
3545 			os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3546 		} else {
3547 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3548 		}
3549 		goto out;
3550 
3551 	case F_SINGLE_WRITER:
3552 		if (fp->f_type != DTYPE_VNODE) {
3553 			error = EBADF;
3554 			goto out;
3555 		}
3556 		if (uap->arg) {
3557 			os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3558 		} else {
3559 			os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3560 		}
3561 		goto out;
3562 
3563 	case F_GLOBAL_NOCACHE:
3564 		if (fp->f_type != DTYPE_VNODE) {
3565 			error = EBADF;
3566 			goto out;
3567 		}
3568 		vp = (struct vnode *)fp_get_data(fp);
3569 		proc_fdunlock(p);
3570 
3571 		if ((error = vnode_getwithref(vp)) == 0) {
3572 			*retval = vnode_isnocache(vp);
3573 
3574 			if (uap->arg) {
3575 				vnode_setnocache(vp);
3576 			} else {
3577 				vnode_clearnocache(vp);
3578 			}
3579 
3580 			(void)vnode_put(vp);
3581 		}
3582 		goto outdrop;
3583 
3584 	case F_CHECK_OPENEVT:
3585 		if (fp->f_type != DTYPE_VNODE) {
3586 			error = EBADF;
3587 			goto out;
3588 		}
3589 		vp = (struct vnode *)fp_get_data(fp);
3590 		proc_fdunlock(p);
3591 
3592 		if ((error = vnode_getwithref(vp)) == 0) {
3593 			*retval = vnode_is_openevt(vp);
3594 
3595 			if (uap->arg) {
3596 				vnode_set_openevt(vp);
3597 			} else {
3598 				vnode_clear_openevt(vp);
3599 			}
3600 
3601 			(void)vnode_put(vp);
3602 		}
3603 		goto outdrop;
3604 
3605 	case F_RDADVISE: {
3606 		struct radvisory ra_struct;
3607 
3608 		if (fp->f_type != DTYPE_VNODE) {
3609 			error = EBADF;
3610 			goto out;
3611 		}
3612 		vp = (struct vnode *)fp_get_data(fp);
3613 		proc_fdunlock(p);
3614 
3615 		if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
3616 			goto outdrop;
3617 		}
3618 		if (ra_struct.ra_offset < 0 || ra_struct.ra_count < 0) {
3619 			error = EINVAL;
3620 			goto outdrop;
3621 		}
3622 		if ((error = vnode_getwithref(vp)) == 0) {
3623 			error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
3624 
3625 			(void)vnode_put(vp);
3626 		}
3627 		goto outdrop;
3628 	}
3629 
3630 	case F_FLUSH_DATA:
3631 
3632 		if (fp->f_type != DTYPE_VNODE) {
3633 			error = EBADF;
3634 			goto out;
3635 		}
3636 		vp = (struct vnode *)fp_get_data(fp);
3637 		proc_fdunlock(p);
3638 
3639 		if ((error = vnode_getwithref(vp)) == 0) {
3640 			error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
3641 
3642 			(void)vnode_put(vp);
3643 		}
3644 		goto outdrop;
3645 
3646 	case F_LOG2PHYS:
3647 	case F_LOG2PHYS_EXT: {
3648 		struct log2phys l2p_struct = {};    /* structure for allocate command */
3649 		int devBlockSize;
3650 
3651 		off_t file_offset = 0;
3652 		size_t a_size = 0;
3653 		size_t run = 0;
3654 
3655 		if (cmd == F_LOG2PHYS_EXT) {
3656 			error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
3657 			if (error) {
3658 				goto out;
3659 			}
3660 			file_offset = l2p_struct.l2p_devoffset;
3661 		} else {
3662 			file_offset = fp->f_offset;
3663 		}
3664 		if (fp->f_type != DTYPE_VNODE) {
3665 			error = EBADF;
3666 			goto out;
3667 		}
3668 		vp = (struct vnode *)fp_get_data(fp);
3669 		proc_fdunlock(p);
3670 		if ((error = vnode_getwithref(vp))) {
3671 			goto outdrop;
3672 		}
3673 		error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
3674 		if (error) {
3675 			(void)vnode_put(vp);
3676 			goto outdrop;
3677 		}
3678 		error = VNOP_BLKTOOFF(vp, lbn, &offset);
3679 		if (error) {
3680 			(void)vnode_put(vp);
3681 			goto outdrop;
3682 		}
3683 		devBlockSize = vfs_devblocksize(vnode_mount(vp));
3684 		if (cmd == F_LOG2PHYS_EXT) {
3685 			if (l2p_struct.l2p_contigbytes < 0) {
3686 				vnode_put(vp);
3687 				error = EINVAL;
3688 				goto outdrop;
3689 			}
3690 
3691 			a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
3692 		} else {
3693 			a_size = devBlockSize;
3694 		}
3695 
3696 		error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
3697 
3698 		(void)vnode_put(vp);
3699 
3700 		if (!error) {
3701 			l2p_struct.l2p_flags = 0;       /* for now */
3702 			if (cmd == F_LOG2PHYS_EXT) {
3703 				l2p_struct.l2p_contigbytes = run - (file_offset - offset);
3704 			} else {
3705 				l2p_struct.l2p_contigbytes = 0; /* for now */
3706 			}
3707 
3708 			/*
3709 			 * The block number being -1 suggests that the file offset is not backed
3710 			 * by any real blocks on-disk.  As a result, just let it be passed back up wholesale.
3711 			 */
3712 			if (bn == -1) {
3713 				/* Don't multiply it by the block size */
3714 				l2p_struct.l2p_devoffset = bn;
3715 			} else {
3716 				l2p_struct.l2p_devoffset = bn * devBlockSize;
3717 				l2p_struct.l2p_devoffset += file_offset - offset;
3718 			}
3719 			error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
3720 		}
3721 		goto outdrop;
3722 	}
3723 	case F_GETPATH:
3724 	case F_GETPATH_NOFIRMLINK: {
3725 		char *pathbufp;
3726 		size_t pathlen;
3727 
3728 		if (fp->f_type != DTYPE_VNODE) {
3729 			error = EBADF;
3730 			goto out;
3731 		}
3732 		vp = (struct vnode *)fp_get_data(fp);
3733 		proc_fdunlock(p);
3734 
3735 		pathlen = MAXPATHLEN;
3736 		pathbufp = zalloc(ZV_NAMEI);
3737 
3738 		if ((error = vnode_getwithref(vp)) == 0) {
3739 			error = vn_getpath_ext(vp, NULL, pathbufp,
3740 			    &pathlen, cmd == F_GETPATH_NOFIRMLINK ?
3741 			    VN_GETPATH_NO_FIRMLINK : 0);
3742 			(void)vnode_put(vp);
3743 
3744 			if (error == 0) {
3745 				error = copyout((caddr_t)pathbufp, argp, pathlen);
3746 			}
3747 		}
3748 		zfree(ZV_NAMEI, pathbufp);
3749 		goto outdrop;
3750 	}
3751 
3752 	case F_PATHPKG_CHECK: {
3753 		char *pathbufp;
3754 		size_t pathlen;
3755 
3756 		if (fp->f_type != DTYPE_VNODE) {
3757 			error = EBADF;
3758 			goto out;
3759 		}
3760 		vp = (struct vnode *)fp_get_data(fp);
3761 		proc_fdunlock(p);
3762 
3763 		pathlen = MAXPATHLEN;
3764 		pathbufp = zalloc(ZV_NAMEI);
3765 
3766 		if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
3767 			if ((error = vnode_getwithref(vp)) == 0) {
3768 				AUDIT_ARG(text, pathbufp);
3769 				error = vn_path_package_check(vp, pathbufp, (int)pathlen, retval);
3770 
3771 				(void)vnode_put(vp);
3772 			}
3773 		}
3774 		zfree(ZV_NAMEI, pathbufp);
3775 		goto outdrop;
3776 	}
3777 
3778 	case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
3779 	case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZE
3780 	case F_BARRIERFSYNC:  // fsync + barrier
3781 	case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
3782 	case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
3783 		if (fp->f_type != DTYPE_VNODE) {
3784 			error = EBADF;
3785 			goto out;
3786 		}
3787 		vp = (struct vnode *)fp_get_data(fp);
3788 		proc_fdunlock(p);
3789 
3790 		if ((error = vnode_getwithref(vp)) == 0) {
3791 			if ((cmd == F_BARRIERFSYNC) &&
3792 			    (vp->v_mount->mnt_supl_kern_flag & MNTK_SUPL_USE_FULLSYNC)) {
3793 				cmd = F_FULLFSYNC;
3794 			}
3795 			error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
3796 
3797 			/*
3798 			 * Promote F_BARRIERFSYNC to F_FULLFSYNC if the underlying
3799 			 * filesystem doesn't support it.
3800 			 */
3801 			if ((error == ENOTTY || error == ENOTSUP || error == EINVAL) &&
3802 			    (cmd == F_BARRIERFSYNC)) {
3803 				os_atomic_or(&vp->v_mount->mnt_supl_kern_flag,
3804 				    MNTK_SUPL_USE_FULLSYNC, relaxed);
3805 
3806 				error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, &context);
3807 			}
3808 
3809 			(void)vnode_put(vp);
3810 		}
3811 		break;
3812 	}
3813 
3814 	/*
3815 	 * SPI (private) for opening a file starting from a dir fd
3816 	 */
3817 	case F_OPENFROM: {
3818 		/* Check if this isn't a valid file descriptor */
3819 		if (fp->f_type != DTYPE_VNODE) {
3820 			error = EBADF;
3821 			goto out;
3822 		}
3823 		vp = (struct vnode *)fp_get_data(fp);
3824 
3825 		return sys_fcntl__OPENFROM(p, fd, cmd, uap->arg, fp, vp, retval);
3826 	}
3827 
3828 	/*
3829 	 * SPI (private) for unlinking a file starting from a dir fd
3830 	 */
3831 	case F_UNLINKFROM: {
3832 		user_addr_t pathname;
3833 
3834 		/* Check if this isn't a valid file descriptor */
3835 		if ((fp->f_type != DTYPE_VNODE) ||
3836 		    (fp->f_flag & FREAD) == 0) {
3837 			error = EBADF;
3838 			goto out;
3839 		}
3840 		vp = (struct vnode *)fp_get_data(fp);
3841 		proc_fdunlock(p);
3842 
3843 		if (vnode_getwithref(vp)) {
3844 			error = ENOENT;
3845 			goto outdrop;
3846 		}
3847 
3848 		/* Only valid for directories */
3849 		if (vp->v_type != VDIR) {
3850 			vnode_put(vp);
3851 			error = ENOTDIR;
3852 			goto outdrop;
3853 		}
3854 
3855 		/*
3856 		 * Only entitled apps may use the credentials of the thread
3857 		 * that opened the file descriptor.
3858 		 * Non-entitled threads will use their own context.
3859 		 */
3860 		if (IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT)) {
3861 			has_entitlement = 1;
3862 		}
3863 
3864 		/* Get flags, mode and pathname arguments. */
3865 		if (IS_64BIT_PROCESS(p)) {
3866 			pathname = (user_addr_t)argp;
3867 		} else {
3868 			pathname = CAST_USER_ADDR_T(argp);
3869 		}
3870 
3871 		/* Start the lookup relative to the file descriptor's vnode. */
3872 		error = unlink1(has_entitlement ? &context : vfs_context_current(),
3873 		    vp, pathname, UIO_USERSPACE, 0);
3874 
3875 		vnode_put(vp);
3876 		break;
3877 	}
3878 
3879 	case F_ADDSIGS:
3880 	case F_ADDFILESIGS:
3881 	case F_ADDFILESIGS_FOR_DYLD_SIM:
3882 	case F_ADDFILESIGS_RETURN:
3883 	case F_ADDFILESIGS_INFO:
3884 	{
3885 		struct cs_blob *blob = NULL;
3886 		struct user_fsignatures fs;
3887 		kern_return_t kr;
3888 		vm_offset_t kernel_blob_addr;
3889 		vm_size_t kernel_blob_size;
3890 		int blob_add_flags = 0;
3891 		const size_t sizeof_fs = (cmd == F_ADDFILESIGS_INFO ?
3892 		    offsetof(struct user_fsignatures, fs_cdhash /* first output element */) :
3893 		    offsetof(struct user_fsignatures, fs_fsignatures_size /* compat */));
3894 
3895 		if (fp->f_type != DTYPE_VNODE) {
3896 			error = EBADF;
3897 			goto out;
3898 		}
3899 		vp = (struct vnode *)fp_get_data(fp);
3900 		proc_fdunlock(p);
3901 
3902 		if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3903 			blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
3904 			if ((proc_getcsflags(p) & CS_KILL) == 0) {
3905 				proc_lock(p);
3906 				proc_csflags_set(p, CS_KILL);
3907 				proc_unlock(p);
3908 			}
3909 		}
3910 
3911 		error = vnode_getwithref(vp);
3912 		if (error) {
3913 			goto outdrop;
3914 		}
3915 
3916 		if (IS_64BIT_PROCESS(p)) {
3917 			error = copyin(argp, &fs, sizeof_fs);
3918 		} else {
3919 			if (cmd == F_ADDFILESIGS_INFO) {
3920 				error = EINVAL;
3921 				vnode_put(vp);
3922 				goto outdrop;
3923 			}
3924 
3925 			struct user32_fsignatures fs32;
3926 
3927 			error = copyin(argp, &fs32, sizeof(fs32));
3928 			fs.fs_file_start = fs32.fs_file_start;
3929 			fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
3930 			fs.fs_blob_size = fs32.fs_blob_size;
3931 		}
3932 
3933 		if (error) {
3934 			vnode_put(vp);
3935 			goto outdrop;
3936 		}
3937 
3938 		/*
3939 		 * First check if we have something loaded a this offset
3940 		 */
3941 		blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
3942 		if (blob != NULL) {
3943 			/* If this is for dyld_sim revalidate the blob */
3944 			if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3945 				error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
3946 				if (error) {
3947 					blob = NULL;
3948 					if (error != EAGAIN) {
3949 						vnode_put(vp);
3950 						goto outdrop;
3951 					}
3952 				}
3953 			}
3954 		}
3955 
3956 		if (blob == NULL) {
3957 			/*
3958 			 * An arbitrary limit, to prevent someone from mapping in a 20GB blob.  This should cover
3959 			 * our use cases for the immediate future, but note that at the time of this commit, some
3960 			 * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
3961 			 *
3962 			 * We should consider how we can manage this more effectively; the above means that some
3963 			 * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
3964 			 * threshold considered ridiculous at the time of this change.
3965 			 */
3966 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
3967 			if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
3968 				error = E2BIG;
3969 				vnode_put(vp);
3970 				goto outdrop;
3971 			}
3972 
3973 			kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
3974 			kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
3975 			if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
3976 				error = ENOMEM;
3977 				vnode_put(vp);
3978 				goto outdrop;
3979 			}
3980 
3981 			if (cmd == F_ADDSIGS) {
3982 				error = copyin(fs.fs_blob_start,
3983 				    (void *) kernel_blob_addr,
3984 				    fs.fs_blob_size);
3985 			} else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM || F_ADDFILESIGS_INFO */
3986 				int resid;
3987 
3988 				error = vn_rdwr(UIO_READ,
3989 				    vp,
3990 				    (caddr_t) kernel_blob_addr,
3991 				    (int)kernel_blob_size,
3992 				    fs.fs_file_start + fs.fs_blob_start,
3993 				    UIO_SYSSPACE,
3994 				    0,
3995 				    kauth_cred_get(),
3996 				    &resid,
3997 				    p);
3998 				if ((error == 0) && resid) {
3999 					/* kernel_blob_size rounded to a page size, but signature may be at end of file */
4000 					memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
4001 				}
4002 			}
4003 
4004 			if (error) {
4005 				ubc_cs_blob_deallocate(kernel_blob_addr,
4006 				    kernel_blob_size);
4007 				vnode_put(vp);
4008 				goto outdrop;
4009 			}
4010 
4011 			blob = NULL;
4012 			error = ubc_cs_blob_add(vp,
4013 			    proc_platform(p),
4014 			    CPU_TYPE_ANY,                       /* not for a specific architecture */
4015 			    CPU_SUBTYPE_ANY,
4016 			    fs.fs_file_start,
4017 			    &kernel_blob_addr,
4018 			    kernel_blob_size,
4019 			    NULL,
4020 			    blob_add_flags,
4021 			    &blob);
4022 
4023 			/* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
4024 			if (error) {
4025 				if (kernel_blob_addr) {
4026 					ubc_cs_blob_deallocate(kernel_blob_addr,
4027 					    kernel_blob_size);
4028 				}
4029 				vnode_put(vp);
4030 				goto outdrop;
4031 			} else {
4032 #if CHECK_CS_VALIDATION_BITMAP
4033 				ubc_cs_validation_bitmap_allocate( vp );
4034 #endif
4035 			}
4036 		}
4037 
4038 		if (cmd == F_ADDFILESIGS_RETURN || cmd == F_ADDFILESIGS_FOR_DYLD_SIM ||
4039 		    cmd == F_ADDFILESIGS_INFO) {
4040 			/*
4041 			 * The first element of the structure is a
4042 			 * off_t that happen to have the same size for
4043 			 * all archs. Lets overwrite that.
4044 			 */
4045 			off_t end_offset = 0;
4046 			if (blob) {
4047 				end_offset = blob->csb_end_offset;
4048 			}
4049 			error = copyout(&end_offset, argp, sizeof(end_offset));
4050 
4051 			if (error) {
4052 				vnode_put(vp);
4053 				goto outdrop;
4054 			}
4055 		}
4056 
4057 		if (cmd == F_ADDFILESIGS_INFO) {
4058 			/* Return information. What we copy out depends on the size of the
4059 			 * passed in structure, to keep binary compatibility. */
4060 
4061 			if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
4062 				// enough room for fs_cdhash[20]+fs_hash_type
4063 
4064 				if (blob != NULL) {
4065 					error = copyout(blob->csb_cdhash,
4066 					    (vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
4067 					    USER_FSIGNATURES_CDHASH_LEN);
4068 					if (error) {
4069 						vnode_put(vp);
4070 						goto outdrop;
4071 					}
4072 					int hashtype = cs_hash_type(blob->csb_hashtype);
4073 					error = copyout(&hashtype,
4074 					    (vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
4075 					    sizeof(int));
4076 					if (error) {
4077 						vnode_put(vp);
4078 						goto outdrop;
4079 					}
4080 				}
4081 			}
4082 		}
4083 
4084 		(void) vnode_put(vp);
4085 		break;
4086 	}
4087 #if CONFIG_SUPPLEMENTAL_SIGNATURES
4088 	case F_ADDFILESUPPL:
4089 	{
4090 		struct vnode *ivp;
4091 		struct cs_blob *blob = NULL;
4092 		struct user_fsupplement fs;
4093 		int orig_fd;
4094 		struct fileproc* orig_fp = NULL;
4095 		kern_return_t kr;
4096 		vm_offset_t kernel_blob_addr;
4097 		vm_size_t kernel_blob_size;
4098 
4099 		if (!IS_64BIT_PROCESS(p)) {
4100 			error = EINVAL;
4101 			goto out; // drop fp and unlock fds
4102 		}
4103 
4104 		if (fp->f_type != DTYPE_VNODE) {
4105 			error = EBADF;
4106 			goto out;
4107 		}
4108 
4109 		error = copyin(argp, &fs, sizeof(fs));
4110 		if (error) {
4111 			goto out;
4112 		}
4113 
4114 		orig_fd = fs.fs_orig_fd;
4115 		if ((error = fp_lookup(p, orig_fd, &orig_fp, 1))) {
4116 			printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
4117 			goto out;
4118 		}
4119 
4120 		if (orig_fp->f_type != DTYPE_VNODE) {
4121 			error = EBADF;
4122 			fp_drop(p, orig_fd, orig_fp, 1);
4123 			goto out;
4124 		}
4125 
4126 		ivp = (struct vnode *)fp_get_data(orig_fp);
4127 
4128 		vp = (struct vnode *)fp_get_data(fp);
4129 
4130 		proc_fdunlock(p);
4131 
4132 		error = vnode_getwithref(ivp);
4133 		if (error) {
4134 			fp_drop(p, orig_fd, orig_fp, 0);
4135 			goto outdrop; //drop fp
4136 		}
4137 
4138 		error = vnode_getwithref(vp);
4139 		if (error) {
4140 			vnode_put(ivp);
4141 			fp_drop(p, orig_fd, orig_fp, 0);
4142 			goto outdrop;
4143 		}
4144 
4145 		if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
4146 			error = E2BIG;
4147 			goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
4148 		}
4149 
4150 		kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
4151 		kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
4152 		if (kr != KERN_SUCCESS) {
4153 			error = ENOMEM;
4154 			goto dropboth;
4155 		}
4156 
4157 		int resid;
4158 		error = vn_rdwr(UIO_READ, vp,
4159 		    (caddr_t)kernel_blob_addr, (int)kernel_blob_size,
4160 		    fs.fs_file_start + fs.fs_blob_start,
4161 		    UIO_SYSSPACE, 0,
4162 		    kauth_cred_get(), &resid, p);
4163 		if ((error == 0) && resid) {
4164 			/* kernel_blob_size rounded to a page size, but signature may be at end of file */
4165 			memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
4166 		}
4167 
4168 		if (error) {
4169 			ubc_cs_blob_deallocate(kernel_blob_addr,
4170 			    kernel_blob_size);
4171 			goto dropboth;
4172 		}
4173 
4174 		error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
4175 		    &kernel_blob_addr, kernel_blob_size, &blob);
4176 
4177 		/* ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed */
4178 		if (error) {
4179 			if (kernel_blob_addr) {
4180 				ubc_cs_blob_deallocate(kernel_blob_addr,
4181 				    kernel_blob_size);
4182 			}
4183 			goto dropboth;
4184 		}
4185 		vnode_put(ivp);
4186 		vnode_put(vp);
4187 		fp_drop(p, orig_fd, orig_fp, 0);
4188 		break;
4189 
4190 dropboth:
4191 		vnode_put(ivp);
4192 		vnode_put(vp);
4193 		fp_drop(p, orig_fd, orig_fp, 0);
4194 		goto outdrop;
4195 	}
4196 #endif
4197 	case F_GETCODEDIR:
4198 	case F_FINDSIGS: {
4199 		error = ENOTSUP;
4200 		goto out;
4201 	}
4202 	case F_CHECK_LV: {
4203 		struct fileglob *fg;
4204 		fchecklv_t lv = {};
4205 
4206 		if (fp->f_type != DTYPE_VNODE) {
4207 			error = EBADF;
4208 			goto out;
4209 		}
4210 		fg = fp->fp_glob;
4211 		proc_fdunlock(p);
4212 
4213 		if (IS_64BIT_PROCESS(p)) {
4214 			error = copyin(argp, &lv, sizeof(lv));
4215 		} else {
4216 			struct user32_fchecklv lv32 = {};
4217 
4218 			error = copyin(argp, &lv32, sizeof(lv32));
4219 			lv.lv_file_start = lv32.lv_file_start;
4220 			lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
4221 			lv.lv_error_message_size = lv32.lv_error_message_size;
4222 		}
4223 		if (error) {
4224 			goto outdrop;
4225 		}
4226 
4227 #if CONFIG_MACF
4228 		error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
4229 		    (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
4230 #endif
4231 
4232 		break;
4233 	}
4234 	case F_GETSIGSINFO: {
4235 		struct cs_blob *blob = NULL;
4236 		fgetsigsinfo_t sigsinfo = {};
4237 
4238 		if (fp->f_type != DTYPE_VNODE) {
4239 			error = EBADF;
4240 			goto out;
4241 		}
4242 		vp = (struct vnode *)fp_get_data(fp);
4243 		proc_fdunlock(p);
4244 
4245 		error = vnode_getwithref(vp);
4246 		if (error) {
4247 			goto outdrop;
4248 		}
4249 
4250 		error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
4251 		if (error) {
4252 			vnode_put(vp);
4253 			goto outdrop;
4254 		}
4255 
4256 		blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
4257 		if (blob == NULL) {
4258 			error = ENOENT;
4259 			vnode_put(vp);
4260 			goto outdrop;
4261 		}
4262 		switch (sigsinfo.fg_info_request) {
4263 		case GETSIGSINFO_PLATFORM_BINARY:
4264 			sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
4265 			error = copyout(&sigsinfo.fg_sig_is_platform,
4266 			    (vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
4267 			    sizeof(sigsinfo.fg_sig_is_platform));
4268 			if (error) {
4269 				vnode_put(vp);
4270 				goto outdrop;
4271 			}
4272 			break;
4273 		default:
4274 			error = EINVAL;
4275 			vnode_put(vp);
4276 			goto outdrop;
4277 		}
4278 		vnode_put(vp);
4279 		break;
4280 	}
4281 #if CONFIG_PROTECT
4282 	case F_GETPROTECTIONCLASS: {
4283 		if (fp->f_type != DTYPE_VNODE) {
4284 			error = EBADF;
4285 			goto out;
4286 		}
4287 		vp = (struct vnode *)fp_get_data(fp);
4288 
4289 		proc_fdunlock(p);
4290 
4291 		if (vnode_getwithref(vp)) {
4292 			error = ENOENT;
4293 			goto outdrop;
4294 		}
4295 
4296 		struct vnode_attr va;
4297 
4298 		VATTR_INIT(&va);
4299 		VATTR_WANTED(&va, va_dataprotect_class);
4300 		error = VNOP_GETATTR(vp, &va, &context);
4301 		if (!error) {
4302 			if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
4303 				*retval = va.va_dataprotect_class;
4304 			} else {
4305 				error = ENOTSUP;
4306 			}
4307 		}
4308 
4309 		vnode_put(vp);
4310 		break;
4311 	}
4312 
4313 	case F_SETPROTECTIONCLASS: {
4314 		/* tmp must be a valid PROTECTION_CLASS_* */
4315 		tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4316 
4317 		if (fp->f_type != DTYPE_VNODE) {
4318 			error = EBADF;
4319 			goto out;
4320 		}
4321 		vp = (struct vnode *)fp_get_data(fp);
4322 
4323 		proc_fdunlock(p);
4324 
4325 		if (vnode_getwithref(vp)) {
4326 			error = ENOENT;
4327 			goto outdrop;
4328 		}
4329 
4330 		/* Only go forward if you have write access */
4331 		vfs_context_t ctx = vfs_context_current();
4332 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4333 			vnode_put(vp);
4334 			error = EBADF;
4335 			goto outdrop;
4336 		}
4337 
4338 		struct vnode_attr va;
4339 
4340 		VATTR_INIT(&va);
4341 		VATTR_SET(&va, va_dataprotect_class, tmp);
4342 
4343 		error = VNOP_SETATTR(vp, &va, ctx);
4344 
4345 		vnode_put(vp);
4346 		break;
4347 	}
4348 
4349 	case F_TRANSCODEKEY: {
4350 		if (fp->f_type != DTYPE_VNODE) {
4351 			error = EBADF;
4352 			goto out;
4353 		}
4354 
4355 		vp = (struct vnode *)fp_get_data(fp);
4356 		proc_fdunlock(p);
4357 
4358 		if (vnode_getwithref(vp)) {
4359 			error = ENOENT;
4360 			goto outdrop;
4361 		}
4362 
4363 		cp_key_t k = {
4364 			.len = CP_MAX_WRAPPEDKEYSIZE,
4365 		};
4366 
4367 		k.key = kalloc_data(CP_MAX_WRAPPEDKEYSIZE, Z_WAITOK | Z_ZERO);
4368 		if (k.key == NULL) {
4369 			error = ENOMEM;
4370 		} else {
4371 			error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
4372 		}
4373 
4374 		vnode_put(vp);
4375 
4376 		if (error == 0) {
4377 			error = copyout(k.key, argp, k.len);
4378 			*retval = k.len;
4379 		}
4380 		kfree_data(k.key, CP_MAX_WRAPPEDKEYSIZE);
4381 
4382 		break;
4383 	}
4384 
4385 	case F_GETPROTECTIONLEVEL:  {
4386 		if (fp->f_type != DTYPE_VNODE) {
4387 			error = EBADF;
4388 			goto out;
4389 		}
4390 
4391 		vp = (struct vnode*)fp_get_data(fp);
4392 		proc_fdunlock(p);
4393 
4394 		if (vnode_getwithref(vp)) {
4395 			error = ENOENT;
4396 			goto outdrop;
4397 		}
4398 
4399 		error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
4400 
4401 		vnode_put(vp);
4402 		break;
4403 	}
4404 
4405 	case F_GETDEFAULTPROTLEVEL:  {
4406 		if (fp->f_type != DTYPE_VNODE) {
4407 			error = EBADF;
4408 			goto out;
4409 		}
4410 
4411 		vp = (struct vnode*)fp_get_data(fp);
4412 		proc_fdunlock(p);
4413 
4414 		if (vnode_getwithref(vp)) {
4415 			error = ENOENT;
4416 			goto outdrop;
4417 		}
4418 
4419 		/*
4420 		 * if cp_get_major_vers fails, error will be set to proper errno
4421 		 * and cp_version will still be 0.
4422 		 */
4423 
4424 		error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
4425 
4426 		vnode_put(vp);
4427 		break;
4428 	}
4429 
4430 #endif /* CONFIG_PROTECT */
4431 
4432 	case F_MOVEDATAEXTENTS: {
4433 		struct fileproc *fp2 = NULL;
4434 		struct vnode *src_vp = NULLVP;
4435 		struct vnode *dst_vp = NULLVP;
4436 		/* We need to grab the 2nd FD out of the arguments before moving on. */
4437 		int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
4438 
4439 		error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
4440 		if (error) {
4441 			goto out;
4442 		}
4443 
4444 		if (fp->f_type != DTYPE_VNODE) {
4445 			error = EBADF;
4446 			goto out;
4447 		}
4448 
4449 		/*
4450 		 * For now, special case HFS+ and APFS only, since this
4451 		 * is SPI.
4452 		 */
4453 		src_vp = (struct vnode *)fp_get_data(fp);
4454 		if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
4455 			error = ENOTSUP;
4456 			goto out;
4457 		}
4458 
4459 		/*
4460 		 * Get the references before we start acquiring iocounts on the vnodes,
4461 		 * while we still hold the proc fd lock
4462 		 */
4463 		if ((error = fp_lookup(p, fd2, &fp2, 1))) {
4464 			error = EBADF;
4465 			goto out;
4466 		}
4467 		if (fp2->f_type != DTYPE_VNODE) {
4468 			fp_drop(p, fd2, fp2, 1);
4469 			error = EBADF;
4470 			goto out;
4471 		}
4472 		dst_vp = (struct vnode *)fp_get_data(fp2);
4473 		if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
4474 			fp_drop(p, fd2, fp2, 1);
4475 			error = ENOTSUP;
4476 			goto out;
4477 		}
4478 
4479 #if CONFIG_MACF
4480 		/* Re-do MAC checks against the new FD, pass in a fake argument */
4481 		error = mac_file_check_fcntl(kauth_cred_get(), fp2->fp_glob, cmd, 0);
4482 		if (error) {
4483 			fp_drop(p, fd2, fp2, 1);
4484 			goto out;
4485 		}
4486 #endif
4487 		/* Audit the 2nd FD */
4488 		AUDIT_ARG(fd, fd2);
4489 
4490 		proc_fdunlock(p);
4491 
4492 		if (vnode_getwithref(src_vp)) {
4493 			fp_drop(p, fd2, fp2, 0);
4494 			error = ENOENT;
4495 			goto outdrop;
4496 		}
4497 		if (vnode_getwithref(dst_vp)) {
4498 			vnode_put(src_vp);
4499 			fp_drop(p, fd2, fp2, 0);
4500 			error = ENOENT;
4501 			goto outdrop;
4502 		}
4503 
4504 		/*
4505 		 * Basic asserts; validate they are not the same and that
4506 		 * both live on the same filesystem.
4507 		 */
4508 		if (dst_vp == src_vp) {
4509 			vnode_put(src_vp);
4510 			vnode_put(dst_vp);
4511 			fp_drop(p, fd2, fp2, 0);
4512 			error = EINVAL;
4513 			goto outdrop;
4514 		}
4515 
4516 		if (dst_vp->v_mount != src_vp->v_mount) {
4517 			vnode_put(src_vp);
4518 			vnode_put(dst_vp);
4519 			fp_drop(p, fd2, fp2, 0);
4520 			error = EXDEV;
4521 			goto outdrop;
4522 		}
4523 
4524 		/* Now we have a legit pair of FDs.  Go to work */
4525 
4526 		/* Now check for write access to the target files */
4527 		if (vnode_authorize(src_vp, NULLVP,
4528 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4529 			vnode_put(src_vp);
4530 			vnode_put(dst_vp);
4531 			fp_drop(p, fd2, fp2, 0);
4532 			error = EBADF;
4533 			goto outdrop;
4534 		}
4535 
4536 		if (vnode_authorize(dst_vp, NULLVP,
4537 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4538 			vnode_put(src_vp);
4539 			vnode_put(dst_vp);
4540 			fp_drop(p, fd2, fp2, 0);
4541 			error = EBADF;
4542 			goto outdrop;
4543 		}
4544 
4545 		/* Verify that both vps point to files and not directories */
4546 		if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
4547 			error = EINVAL;
4548 			vnode_put(src_vp);
4549 			vnode_put(dst_vp);
4550 			fp_drop(p, fd2, fp2, 0);
4551 			goto outdrop;
4552 		}
4553 
4554 		/*
4555 		 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
4556 		 * We'll pass in our special bit indicating that the new behavior is expected
4557 		 */
4558 
4559 		error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
4560 
4561 		vnode_put(src_vp);
4562 		vnode_put(dst_vp);
4563 		fp_drop(p, fd2, fp2, 0);
4564 		break;
4565 	}
4566 
4567 	case F_TRANSFEREXTENTS: {
4568 		struct fileproc *fp2 = NULL;
4569 		struct vnode *src_vp = NULLVP;
4570 		struct vnode *dst_vp = NULLVP;
4571 
4572 		/* Get 2nd FD out of the arguments. */
4573 		int fd2 = CAST_DOWN_EXPLICIT(int, uap->arg);
4574 		if (fd2 < 0) {
4575 			error = EINVAL;
4576 			goto out;
4577 		}
4578 
4579 		if (fp->f_type != DTYPE_VNODE) {
4580 			error = EBADF;
4581 			goto out;
4582 		}
4583 
4584 		/*
4585 		 * Only allow this for APFS
4586 		 */
4587 		src_vp = (struct vnode *)fp_get_data(fp);
4588 		if (src_vp->v_tag != VT_APFS) {
4589 			error = ENOTSUP;
4590 			goto out;
4591 		}
4592 
4593 		/*
4594 		 * Get the references before we start acquiring iocounts on the vnodes,
4595 		 * while we still hold the proc fd lock
4596 		 */
4597 		if ((error = fp_lookup(p, fd2, &fp2, 1))) {
4598 			error = EBADF;
4599 			goto out;
4600 		}
4601 		if (fp2->f_type != DTYPE_VNODE) {
4602 			fp_drop(p, fd2, fp2, 1);
4603 			error = EBADF;
4604 			goto out;
4605 		}
4606 		dst_vp = (struct vnode *)fp_get_data(fp2);
4607 		if (dst_vp->v_tag != VT_APFS) {
4608 			fp_drop(p, fd2, fp2, 1);
4609 			error = ENOTSUP;
4610 			goto out;
4611 		}
4612 
4613 #if CONFIG_MACF
4614 		/* Re-do MAC checks against the new FD, pass in a fake argument */
4615 		error = mac_file_check_fcntl(kauth_cred_get(), fp2->fp_glob, cmd, 0);
4616 		if (error) {
4617 			fp_drop(p, fd2, fp2, 1);
4618 			goto out;
4619 		}
4620 #endif
4621 		/* Audit the 2nd FD */
4622 		AUDIT_ARG(fd, fd2);
4623 
4624 		proc_fdunlock(p);
4625 
4626 		if (vnode_getwithref(src_vp)) {
4627 			fp_drop(p, fd2, fp2, 0);
4628 			error = ENOENT;
4629 			goto outdrop;
4630 		}
4631 		if (vnode_getwithref(dst_vp)) {
4632 			vnode_put(src_vp);
4633 			fp_drop(p, fd2, fp2, 0);
4634 			error = ENOENT;
4635 			goto outdrop;
4636 		}
4637 
4638 		/*
4639 		 * Validate they are not the same and that
4640 		 * both live on the same filesystem.
4641 		 */
4642 		if (dst_vp == src_vp) {
4643 			vnode_put(src_vp);
4644 			vnode_put(dst_vp);
4645 			fp_drop(p, fd2, fp2, 0);
4646 			error = EINVAL;
4647 			goto outdrop;
4648 		}
4649 		if (dst_vp->v_mount != src_vp->v_mount) {
4650 			vnode_put(src_vp);
4651 			vnode_put(dst_vp);
4652 			fp_drop(p, fd2, fp2, 0);
4653 			error = EXDEV;
4654 			goto outdrop;
4655 		}
4656 
4657 		/* Verify that both vps point to files and not directories */
4658 		if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
4659 			error = EINVAL;
4660 			vnode_put(src_vp);
4661 			vnode_put(dst_vp);
4662 			fp_drop(p, fd2, fp2, 0);
4663 			goto outdrop;
4664 		}
4665 
4666 
4667 		/*
4668 		 * Okay, vps are legit. Check  access.  We'll require write access
4669 		 * to both files.
4670 		 */
4671 		if (vnode_authorize(src_vp, NULLVP,
4672 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4673 			vnode_put(src_vp);
4674 			vnode_put(dst_vp);
4675 			fp_drop(p, fd2, fp2, 0);
4676 			error = EBADF;
4677 			goto outdrop;
4678 		}
4679 		if (vnode_authorize(dst_vp, NULLVP,
4680 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4681 			vnode_put(src_vp);
4682 			vnode_put(dst_vp);
4683 			fp_drop(p, fd2, fp2, 0);
4684 			error = EBADF;
4685 			goto outdrop;
4686 		}
4687 
4688 		/* Pass it on through to the fs */
4689 		error = VNOP_IOCTL(src_vp, cmd, (caddr_t)dst_vp, 0, &context);
4690 
4691 		vnode_put(src_vp);
4692 		vnode_put(dst_vp);
4693 		fp_drop(p, fd2, fp2, 0);
4694 		break;
4695 	}
4696 
4697 	/*
4698 	 * SPI for making a file compressed.
4699 	 */
4700 	case F_MAKECOMPRESSED: {
4701 		uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4702 
4703 		if (fp->f_type != DTYPE_VNODE) {
4704 			error = EBADF;
4705 			goto out;
4706 		}
4707 
4708 		vp = (struct vnode*)fp_get_data(fp);
4709 		proc_fdunlock(p);
4710 
4711 		/* get the vnode */
4712 		if (vnode_getwithref(vp)) {
4713 			error = ENOENT;
4714 			goto outdrop;
4715 		}
4716 
4717 		/* Is it a file? */
4718 		if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
4719 			vnode_put(vp);
4720 			error = EBADF;
4721 			goto outdrop;
4722 		}
4723 
4724 		/* invoke ioctl to pass off to FS */
4725 		/* Only go forward if you have write access */
4726 		vfs_context_t ctx = vfs_context_current();
4727 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4728 			vnode_put(vp);
4729 			error = EBADF;
4730 			goto outdrop;
4731 		}
4732 
4733 		error = VNOP_IOCTL(vp, cmd, (caddr_t)&gcounter, 0, &context);
4734 
4735 		vnode_put(vp);
4736 		break;
4737 	}
4738 
4739 	/*
4740 	 * SPI (private) for indicating to a filesystem that subsequent writes to
4741 	 * the open FD will written to the Fastflow.
4742 	 */
4743 	case F_SET_GREEDY_MODE:
4744 	/* intentionally drop through to the same handler as F_SETSTATIC.
4745 	 * both fcntls should pass the argument and their selector into VNOP_IOCTL.
4746 	 */
4747 
4748 	/*
4749 	 * SPI (private) for indicating to a filesystem that subsequent writes to
4750 	 * the open FD will represent static content.
4751 	 */
4752 	case F_SETSTATICCONTENT: {
4753 		caddr_t ioctl_arg = NULL;
4754 
4755 		if (uap->arg) {
4756 			ioctl_arg = (caddr_t) 1;
4757 		}
4758 
4759 		if (fp->f_type != DTYPE_VNODE) {
4760 			error = EBADF;
4761 			goto out;
4762 		}
4763 		vp = (struct vnode *)fp_get_data(fp);
4764 		proc_fdunlock(p);
4765 
4766 		error = vnode_getwithref(vp);
4767 		if (error) {
4768 			error = ENOENT;
4769 			goto outdrop;
4770 		}
4771 
4772 		/* Only go forward if you have write access */
4773 		vfs_context_t ctx = vfs_context_current();
4774 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4775 			vnode_put(vp);
4776 			error = EBADF;
4777 			goto outdrop;
4778 		}
4779 
4780 		error = VNOP_IOCTL(vp, cmd, ioctl_arg, 0, &context);
4781 		(void)vnode_put(vp);
4782 
4783 		break;
4784 	}
4785 
4786 	/*
4787 	 * SPI (private) for indicating to the lower level storage driver that the
4788 	 * subsequent writes should be of a particular IO type (burst, greedy, static),
4789 	 * or other flavors that may be necessary.
4790 	 */
4791 	case F_SETIOTYPE: {
4792 		caddr_t param_ptr;
4793 		uint32_t param;
4794 
4795 		if (uap->arg) {
4796 			/* extract 32 bits of flags from userland */
4797 			param_ptr = (caddr_t) uap->arg;
4798 			param = (uint32_t) param_ptr;
4799 		} else {
4800 			/* If no argument is specified, error out */
4801 			error = EINVAL;
4802 			goto out;
4803 		}
4804 
4805 		/*
4806 		 * Validate the different types of flags that can be specified:
4807 		 * all of them are mutually exclusive for now.
4808 		 */
4809 		switch (param) {
4810 		case F_IOTYPE_ISOCHRONOUS:
4811 			break;
4812 
4813 		default:
4814 			error = EINVAL;
4815 			goto out;
4816 		}
4817 
4818 
4819 		if (fp->f_type != DTYPE_VNODE) {
4820 			error = EBADF;
4821 			goto out;
4822 		}
4823 		vp = (struct vnode *)fp_get_data(fp);
4824 		proc_fdunlock(p);
4825 
4826 		error = vnode_getwithref(vp);
4827 		if (error) {
4828 			error = ENOENT;
4829 			goto outdrop;
4830 		}
4831 
4832 		/* Only go forward if you have write access */
4833 		vfs_context_t ctx = vfs_context_current();
4834 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4835 			vnode_put(vp);
4836 			error = EBADF;
4837 			goto outdrop;
4838 		}
4839 
4840 		error = VNOP_IOCTL(vp, cmd, param_ptr, 0, &context);
4841 		(void)vnode_put(vp);
4842 
4843 		break;
4844 	}
4845 
4846 	/*
4847 	 * Set the vnode pointed to by 'fd'
4848 	 * and tag it as the (potentially future) backing store
4849 	 * for another filesystem
4850 	 */
4851 	case F_SETBACKINGSTORE: {
4852 		if (fp->f_type != DTYPE_VNODE) {
4853 			error = EBADF;
4854 			goto out;
4855 		}
4856 
4857 		vp = (struct vnode *)fp_get_data(fp);
4858 
4859 		if (vp->v_tag != VT_HFS) {
4860 			error = EINVAL;
4861 			goto out;
4862 		}
4863 		proc_fdunlock(p);
4864 
4865 		if (vnode_getwithref(vp)) {
4866 			error = ENOENT;
4867 			goto outdrop;
4868 		}
4869 
4870 		/* only proceed if you have write access */
4871 		vfs_context_t ctx = vfs_context_current();
4872 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4873 			vnode_put(vp);
4874 			error = EBADF;
4875 			goto outdrop;
4876 		}
4877 
4878 
4879 		/* If arg != 0, set, otherwise unset */
4880 		if (uap->arg) {
4881 			error = VNOP_IOCTL(vp, cmd, (caddr_t)1, 0, &context);
4882 		} else {
4883 			error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
4884 		}
4885 
4886 		vnode_put(vp);
4887 		break;
4888 	}
4889 
4890 	/*
4891 	 * like F_GETPATH, but special semantics for
4892 	 * the mobile time machine handler.
4893 	 */
4894 	case F_GETPATH_MTMINFO: {
4895 		char *pathbufp;
4896 		int pathlen;
4897 
4898 		if (fp->f_type != DTYPE_VNODE) {
4899 			error = EBADF;
4900 			goto out;
4901 		}
4902 		vp = (struct vnode *)fp_get_data(fp);
4903 		proc_fdunlock(p);
4904 
4905 		pathlen = MAXPATHLEN;
4906 		pathbufp = zalloc(ZV_NAMEI);
4907 
4908 		if ((error = vnode_getwithref(vp)) == 0) {
4909 			int backingstore = 0;
4910 
4911 			/* Check for error from vn_getpath before moving on */
4912 			if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
4913 				if (vp->v_tag == VT_HFS) {
4914 					error = VNOP_IOCTL(vp, cmd, (caddr_t) &backingstore, 0, &context);
4915 				}
4916 				(void)vnode_put(vp);
4917 
4918 				if (error == 0) {
4919 					error = copyout((caddr_t)pathbufp, argp, pathlen);
4920 				}
4921 				if (error == 0) {
4922 					/*
4923 					 * If the copyout was successful, now check to ensure
4924 					 * that this vnode is not a BACKINGSTORE vnode.  mtmd
4925 					 * wants the path regardless.
4926 					 */
4927 					if (backingstore) {
4928 						error = EBUSY;
4929 					}
4930 				}
4931 			} else {
4932 				(void)vnode_put(vp);
4933 			}
4934 		}
4935 
4936 		zfree(ZV_NAMEI, pathbufp);
4937 		goto outdrop;
4938 	}
4939 
4940 	case F_RECYCLE: {
4941 #if !DEBUG && !DEVELOPMENT
4942 		bool allowed = false;
4943 
4944 		//
4945 		// non-debug and non-development kernels have restrictions
4946 		// on who can all this fcntl.  the process has to be marked
4947 		// with the dataless-manipulator entitlement and either the
4948 		// process or thread have to be marked rapid-aging.
4949 		//
4950 		if (!vfs_context_is_dataless_manipulator(&context)) {
4951 			error = EPERM;
4952 			goto out;
4953 		}
4954 
4955 		proc_t proc = vfs_context_proc(&context);
4956 		if (proc && (proc->p_lflag & P_LRAGE_VNODES)) {
4957 			allowed = true;
4958 		} else {
4959 			thread_t thr = vfs_context_thread(&context);
4960 			if (thr) {
4961 				struct uthread *ut = get_bsdthread_info(thr);
4962 
4963 				if (ut && (ut->uu_flag & UT_RAGE_VNODES)) {
4964 					allowed = true;
4965 				}
4966 			}
4967 		}
4968 		if (!allowed) {
4969 			error = EPERM;
4970 			goto out;
4971 		}
4972 #endif
4973 
4974 		if (fp->f_type != DTYPE_VNODE) {
4975 			error = EBADF;
4976 			goto out;
4977 		}
4978 		vp = (struct vnode *)fp_get_data(fp);
4979 		proc_fdunlock(p);
4980 
4981 		vnode_recycle(vp);
4982 		break;
4983 	}
4984 
4985 #if CONFIG_FILE_LEASES
4986 	case F_SETLEASE: {
4987 		struct fileglob *fg;
4988 		int fl_type;
4989 		int expcounts;
4990 
4991 		if (fp->f_type != DTYPE_VNODE) {
4992 			error = EBADF;
4993 			goto out;
4994 		}
4995 		vp = (struct vnode *)fp_get_data(fp);
4996 		fg = fp->fp_glob;;
4997 		proc_fdunlock(p);
4998 
4999 		/*
5000 		 * In order to allow a process to avoid breaking
5001 		 * its own leases, the expected open count needs
5002 		 * to be provided to F_SETLEASE when placing write lease.
5003 		 * Similarly, in order to allow a process to place a read lease
5004 		 * after opening the file multiple times in RW mode, the expected
5005 		 * write count needs to be provided to F_SETLEASE when placing a
5006 		 * read lease.
5007 		 *
5008 		 * We use the upper 30 bits of the integer argument (way more than
5009 		 * enough) as the expected open/write count.
5010 		 *
5011 		 * If the caller passed 0 for the expected open count,
5012 		 * assume 1.
5013 		 */
5014 		fl_type = CAST_DOWN_EXPLICIT(int, uap->arg);
5015 		expcounts = (unsigned int)fl_type >> 2;
5016 		fl_type &= 3;
5017 
5018 		if (fl_type == F_WRLCK && expcounts == 0) {
5019 			expcounts = 1;
5020 		}
5021 
5022 		AUDIT_ARG(value32, fl_type);
5023 
5024 		if ((error = vnode_getwithref(vp))) {
5025 			goto outdrop;
5026 		}
5027 
5028 		/*
5029 		 * Only support for regular file/dir mounted on local-based filesystem.
5030 		 */
5031 		if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VDIR) ||
5032 		    !(vfs_flags(vnode_mount(vp)) & MNT_LOCAL)) {
5033 			error = EBADF;
5034 			vnode_put(vp);
5035 			goto outdrop;
5036 		}
5037 
5038 		/* For directory, we only support read lease. */
5039 		if (vnode_vtype(vp) == VDIR && fl_type == F_WRLCK) {
5040 			error = ENOTSUP;
5041 			vnode_put(vp);
5042 			goto outdrop;
5043 		}
5044 
5045 		switch (fl_type) {
5046 		case F_RDLCK:
5047 		case F_WRLCK:
5048 		case F_UNLCK:
5049 			error = vnode_setlease(vp, fg, fl_type, expcounts,
5050 			    vfs_context_current());
5051 			break;
5052 		default:
5053 			error = EINVAL;
5054 			break;
5055 		}
5056 
5057 		vnode_put(vp);
5058 		goto outdrop;
5059 	}
5060 
5061 	case F_GETLEASE: {
5062 		if (fp->f_type != DTYPE_VNODE) {
5063 			error = EBADF;
5064 			goto out;
5065 		}
5066 		vp = (struct vnode *)fp_get_data(fp);
5067 		proc_fdunlock(p);
5068 
5069 		if ((error = vnode_getwithref(vp))) {
5070 			goto outdrop;
5071 		}
5072 
5073 		if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VDIR) ||
5074 		    !(vfs_flags(vnode_mount(vp)) & MNT_LOCAL)) {
5075 			error = EBADF;
5076 			vnode_put(vp);
5077 			goto outdrop;
5078 		}
5079 
5080 		error = 0;
5081 		*retval = vnode_getlease(vp);
5082 		vnode_put(vp);
5083 		goto outdrop;
5084 	}
5085 #endif /* CONFIG_FILE_LEASES */
5086 
5087 	/* SPI (private) for asserting background access to a file */
5088 	case F_ASSERT_BG_ACCESS:
5089 	/* SPI (private) for releasing background access to a file */
5090 	case F_RELEASE_BG_ACCESS: {
5091 		/*
5092 		 * Check if the process is platform code, which means
5093 		 * that it is considered part of the Operating System.
5094 		 */
5095 		if (!csproc_get_platform_binary(p)) {
5096 			error = EPERM;
5097 			goto out;
5098 		}
5099 
5100 		if (fp->f_type != DTYPE_VNODE) {
5101 			error = EBADF;
5102 			goto out;
5103 		}
5104 
5105 		vp = (struct vnode *)fp_get_data(fp);
5106 		proc_fdunlock(p);
5107 
5108 		if (vnode_getwithref(vp)) {
5109 			error = ENOENT;
5110 			goto outdrop;
5111 		}
5112 
5113 		/* Verify that vp points to a file and not a directory */
5114 		if (!vnode_isreg(vp)) {
5115 			vnode_put(vp);
5116 			error = EINVAL;
5117 			goto outdrop;
5118 		}
5119 
5120 		/* Only proceed if you have read access */
5121 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_READ_DATA), &context) != 0) {
5122 			vnode_put(vp);
5123 			error = EBADF;
5124 			goto outdrop;
5125 		}
5126 
5127 		if (cmd == F_ASSERT_BG_ACCESS) {
5128 			fassertbgaccess_t args;
5129 
5130 			if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
5131 				vnode_put(vp);
5132 				goto outdrop;
5133 			}
5134 
5135 			error = VNOP_IOCTL(vp, F_ASSERT_BG_ACCESS, (caddr_t)&args, 0, &context);
5136 		} else {
5137 			// cmd == F_RELEASE_BG_ACCESS
5138 			error = VNOP_IOCTL(vp, F_RELEASE_BG_ACCESS, (caddr_t)NULL, 0, &context);
5139 		}
5140 
5141 		vnode_put(vp);
5142 
5143 		goto outdrop;
5144 	}
5145 
5146 	default:
5147 		/*
5148 		 * This is an fcntl() that we d not recognize at this level;
5149 		 * if this is a vnode, we send it down into the VNOP_IOCTL
5150 		 * for this vnode; this can include special devices, and will
5151 		 * effectively overload fcntl() to send ioctl()'s.
5152 		 */
5153 		if ((cmd & IOC_VOID) && (cmd & IOC_INOUT)) {
5154 			error = EINVAL;
5155 			goto out;
5156 		}
5157 
5158 		/*
5159 		 * Catch any now-invalid fcntl() selectors.
5160 		 * (When adding a selector to this list, it may be prudent
5161 		 * to consider adding it to the list in fsctl_internal() as well.)
5162 		 */
5163 		switch (cmd) {
5164 		case (int)APFSIOC_REVERT_TO_SNAPSHOT:
5165 		case (int)FSIOC_FIOSEEKHOLE:
5166 		case (int)FSIOC_FIOSEEKDATA:
5167 		case (int)FSIOC_CAS_BSDFLAGS:
5168 		case (int)FSIOC_KERNEL_ROOTAUTH:
5169 		case (int)FSIOC_GRAFT_FS:
5170 		case (int)FSIOC_UNGRAFT_FS:
5171 		case (int)FSIOC_AUTH_FS:
5172 		case HFS_GET_BOOT_INFO:
5173 		case HFS_SET_BOOT_INFO:
5174 		case FIOPINSWAP:
5175 		case F_MARKDEPENDENCY:
5176 		case TIOCREVOKE:
5177 		case TIOCREVOKECLEAR:
5178 			error = EINVAL;
5179 			goto out;
5180 		default:
5181 			break;
5182 		}
5183 
5184 		if (fp->f_type != DTYPE_VNODE) {
5185 			error = EBADF;
5186 			goto out;
5187 		}
5188 		vp = (struct vnode *)fp_get_data(fp);
5189 		proc_fdunlock(p);
5190 
5191 		if ((error = vnode_getwithref(vp)) == 0) {
5192 #define STK_PARAMS 128
5193 			char stkbuf[STK_PARAMS] = {0};
5194 			unsigned int size;
5195 			caddr_t data, memp;
5196 			/*
5197 			 * For this to work properly, we have to copy in the
5198 			 * ioctl() cmd argument if there is one; we must also
5199 			 * check that a command parameter, if present, does
5200 			 * not exceed the maximum command length dictated by
5201 			 * the number of bits we have available in the command
5202 			 * to represent a structure length.  Finally, we have
5203 			 * to copy the results back out, if it is that type of
5204 			 * ioctl().
5205 			 */
5206 			size = IOCPARM_LEN(cmd);
5207 			if (size > IOCPARM_MAX) {
5208 				(void)vnode_put(vp);
5209 				error = EINVAL;
5210 				break;
5211 			}
5212 
5213 			memp = NULL;
5214 			if (size > sizeof(stkbuf)) {
5215 				memp = (caddr_t)kalloc_data(size, Z_WAITOK);
5216 				if (memp == 0) {
5217 					(void)vnode_put(vp);
5218 					error = ENOMEM;
5219 					goto outdrop;
5220 				}
5221 				data = memp;
5222 			} else {
5223 				data = &stkbuf[0];
5224 			}
5225 
5226 			if (cmd & IOC_IN) {
5227 				if (size) {
5228 					/* structure */
5229 					error = copyin(argp, data, size);
5230 					if (error) {
5231 						(void)vnode_put(vp);
5232 						if (memp) {
5233 							kfree_data(memp, size);
5234 						}
5235 						goto outdrop;
5236 					}
5237 
5238 					/* Bzero the section beyond that which was needed */
5239 					if (size <= sizeof(stkbuf)) {
5240 						bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
5241 					}
5242 				} else {
5243 					/* int */
5244 					if (is64bit) {
5245 						*(user_addr_t *)data = argp;
5246 					} else {
5247 						*(uint32_t *)data = (uint32_t)argp;
5248 					}
5249 				};
5250 			} else if ((cmd & IOC_OUT) && size) {
5251 				/*
5252 				 * Zero the buffer so the user always
5253 				 * gets back something deterministic.
5254 				 */
5255 				bzero(data, size);
5256 			} else if (cmd & IOC_VOID) {
5257 				if (is64bit) {
5258 					*(user_addr_t *)data = argp;
5259 				} else {
5260 					*(uint32_t *)data = (uint32_t)argp;
5261 				}
5262 			}
5263 
5264 			error = VNOP_IOCTL(vp, cmd, CAST_DOWN(caddr_t, data), 0, &context);
5265 
5266 			(void)vnode_put(vp);
5267 
5268 			/* Copy any output data to user */
5269 			if (error == 0 && (cmd & IOC_OUT) && size) {
5270 				error = copyout(data, argp, size);
5271 			}
5272 			if (memp) {
5273 				kfree_data(memp, size);
5274 			}
5275 		}
5276 		break;
5277 	}
5278 
5279 outdrop:
5280 	return sys_fcntl_outdrop(p, fd, fp, vp, error);
5281 
5282 out:
5283 	return sys_fcntl_out(p, fd, fp, error);
5284 }
5285 
5286 
5287 /*
5288  * sys_close
5289  *
5290  * Description:	The implementation of the close(2) system call
5291  *
5292  * Parameters:	p			Process in whose per process file table
5293  *					the close is to occur
5294  *		uap->fd			fd to be closed
5295  *		retval			<unused>
5296  *
5297  * Returns:	0			Success
5298  *	fp_lookup:EBADF			Bad file descriptor
5299  *      fp_guard_exception:???          Guarded file descriptor
5300  *	close_internal:EBADF
5301  *	close_internal:???              Anything returnable by a per-fileops
5302  *					close function
5303  */
5304 int
sys_close(proc_t p,struct close_args * uap,__unused int32_t * retval)5305 sys_close(proc_t p, struct close_args *uap, __unused int32_t *retval)
5306 {
5307 	__pthread_testcancel(1);
5308 	return close_nocancel(p, uap->fd);
5309 }
5310 
5311 int
sys_close_nocancel(proc_t p,struct close_nocancel_args * uap,__unused int32_t * retval)5312 sys_close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
5313 {
5314 	return close_nocancel(p, uap->fd);
5315 }
5316 
5317 int
close_nocancel(proc_t p,int fd)5318 close_nocancel(proc_t p, int fd)
5319 {
5320 	struct fileproc *fp;
5321 
5322 	AUDIT_SYSCLOSE(p, fd);
5323 
5324 	proc_fdlock(p);
5325 	if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
5326 		proc_fdunlock(p);
5327 		return EBADF;
5328 	}
5329 
5330 	if (fp_isguarded(fp, GUARD_CLOSE)) {
5331 		int error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
5332 		proc_fdunlock(p);
5333 		return error;
5334 	}
5335 
5336 	return fp_close_and_unlock(p, fd, fp, 0);
5337 }
5338 
5339 
5340 /*
5341  * fstat
5342  *
5343  * Description:	Return status information about a file descriptor.
5344  *
5345  * Parameters:	p				The process doing the fstat
5346  *		fd				The fd to stat
5347  *		ub				The user stat buffer
5348  *		xsecurity			The user extended security
5349  *						buffer, or 0 if none
5350  *		xsecurity_size			The size of xsecurity, or 0
5351  *						if no xsecurity
5352  *		isstat64			Flag to indicate 64 bit version
5353  *						for inode size, etc.
5354  *
5355  * Returns:	0				Success
5356  *		EBADF
5357  *		EFAULT
5358  *	fp_lookup:EBADF				Bad file descriptor
5359  *	vnode_getwithref:???
5360  *	copyout:EFAULT
5361  *	vnode_getwithref:???
5362  *	vn_stat:???
5363  *	soo_stat:???
5364  *	pipe_stat:???
5365  *	pshm_stat:???
5366  *	kqueue_stat:???
5367  *
5368  * Notes:	Internal implementation for all other fstat() related
5369  *		functions
5370  *
5371  *		XXX switch on node type is bogus; need a stat in struct
5372  *		XXX fileops instead.
5373  */
5374 static int
fstat(proc_t p,int fd,user_addr_t ub,user_addr_t xsecurity,user_addr_t xsecurity_size,int isstat64)5375 fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity,
5376     user_addr_t xsecurity_size, int isstat64)
5377 {
5378 	struct fileproc *fp;
5379 	union {
5380 		struct stat sb;
5381 		struct stat64 sb64;
5382 	} source;
5383 	union {
5384 		struct user64_stat user64_sb;
5385 		struct user32_stat user32_sb;
5386 		struct user64_stat64 user64_sb64;
5387 		struct user32_stat64 user32_sb64;
5388 	} dest;
5389 	int error, my_size;
5390 	file_type_t type;
5391 	caddr_t data;
5392 	kauth_filesec_t fsec;
5393 	user_size_t xsecurity_bufsize;
5394 	vfs_context_t ctx = vfs_context_current();
5395 	void * sbptr;
5396 
5397 
5398 	AUDIT_ARG(fd, fd);
5399 
5400 	if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
5401 		return error;
5402 	}
5403 	type = fp->f_type;
5404 	data = (caddr_t)fp_get_data(fp);
5405 	fsec = KAUTH_FILESEC_NONE;
5406 
5407 	sbptr = (void *)&source;
5408 
5409 	switch (type) {
5410 	case DTYPE_VNODE:
5411 		if ((error = vnode_getwithref((vnode_t)data)) == 0) {
5412 			/*
5413 			 * If the caller has the file open, and is not
5414 			 * requesting extended security information, we are
5415 			 * going to let them get the basic stat information.
5416 			 */
5417 			if (xsecurity == USER_ADDR_NULL) {
5418 				error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
5419 				    fp->fp_glob->fg_cred);
5420 			} else {
5421 				error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
5422 			}
5423 
5424 			AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
5425 			(void)vnode_put((vnode_t)data);
5426 		}
5427 		break;
5428 
5429 #if SOCKETS
5430 	case DTYPE_SOCKET:
5431 		error = soo_stat((struct socket *)data, sbptr, isstat64);
5432 		break;
5433 #endif /* SOCKETS */
5434 
5435 	case DTYPE_PIPE:
5436 		error = pipe_stat((void *)data, sbptr, isstat64);
5437 		break;
5438 
5439 	case DTYPE_PSXSHM:
5440 		error = pshm_stat((void *)data, sbptr, isstat64);
5441 		break;
5442 
5443 	case DTYPE_KQUEUE:
5444 		error = kqueue_stat((void *)data, sbptr, isstat64, p);
5445 		break;
5446 
5447 	default:
5448 		error = EBADF;
5449 		goto out;
5450 	}
5451 	if (error == 0) {
5452 		caddr_t sbp;
5453 
5454 		if (isstat64 != 0) {
5455 			source.sb64.st_lspare = 0;
5456 			source.sb64.st_qspare[0] = 0LL;
5457 			source.sb64.st_qspare[1] = 0LL;
5458 
5459 			if (IS_64BIT_PROCESS(p)) {
5460 				munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5461 				my_size = sizeof(dest.user64_sb64);
5462 				sbp = (caddr_t)&dest.user64_sb64;
5463 			} else {
5464 				munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5465 				my_size = sizeof(dest.user32_sb64);
5466 				sbp = (caddr_t)&dest.user32_sb64;
5467 			}
5468 		} else {
5469 			source.sb.st_lspare = 0;
5470 			source.sb.st_qspare[0] = 0LL;
5471 			source.sb.st_qspare[1] = 0LL;
5472 			if (IS_64BIT_PROCESS(p)) {
5473 				munge_user64_stat(&source.sb, &dest.user64_sb);
5474 				my_size = sizeof(dest.user64_sb);
5475 				sbp = (caddr_t)&dest.user64_sb;
5476 			} else {
5477 				munge_user32_stat(&source.sb, &dest.user32_sb);
5478 				my_size = sizeof(dest.user32_sb);
5479 				sbp = (caddr_t)&dest.user32_sb;
5480 			}
5481 		}
5482 
5483 		error = copyout(sbp, ub, my_size);
5484 	}
5485 
5486 	/* caller wants extended security information? */
5487 	if (xsecurity != USER_ADDR_NULL) {
5488 		/* did we get any? */
5489 		if (fsec == KAUTH_FILESEC_NONE) {
5490 			if (susize(xsecurity_size, 0) != 0) {
5491 				error = EFAULT;
5492 				goto out;
5493 			}
5494 		} else {
5495 			/* find the user buffer size */
5496 			xsecurity_bufsize = fusize(xsecurity_size);
5497 
5498 			/* copy out the actual data size */
5499 			if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5500 				error = EFAULT;
5501 				goto out;
5502 			}
5503 
5504 			/* if the caller supplied enough room, copy out to it */
5505 			if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
5506 				error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5507 			}
5508 		}
5509 	}
5510 out:
5511 	fp_drop(p, fd, fp, 0);
5512 	if (fsec != NULL) {
5513 		kauth_filesec_free(fsec);
5514 	}
5515 	return error;
5516 }
5517 
5518 
5519 /*
5520  * sys_fstat_extended
5521  *
5522  * Description:	Extended version of fstat supporting returning extended
5523  *		security information
5524  *
5525  * Parameters:	p				The process doing the fstat
5526  *		uap->fd				The fd to stat
5527  *		uap->ub				The user stat buffer
5528  *		uap->xsecurity			The user extended security
5529  *						buffer, or 0 if none
5530  *		uap->xsecurity_size		The size of xsecurity, or 0
5531  *
5532  * Returns:	0				Success
5533  *		!0				Errno (see fstat)
5534  */
5535 int
sys_fstat_extended(proc_t p,struct fstat_extended_args * uap,__unused int32_t * retval)5536 sys_fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
5537 {
5538 	return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
5539 }
5540 
5541 
5542 /*
5543  * sys_fstat
5544  *
5545  * Description:	Get file status for the file associated with fd
5546  *
5547  * Parameters:	p				The process doing the fstat
5548  *		uap->fd				The fd to stat
5549  *		uap->ub				The user stat buffer
5550  *
5551  * Returns:	0				Success
5552  *		!0				Errno (see fstat)
5553  */
5554 int
sys_fstat(proc_t p,struct fstat_args * uap,__unused int32_t * retval)5555 sys_fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
5556 {
5557 	return fstat(p, uap->fd, uap->ub, 0, 0, 0);
5558 }
5559 
5560 
5561 /*
5562  * sys_fstat64_extended
5563  *
5564  * Description:	Extended version of fstat64 supporting returning extended
5565  *		security information
5566  *
5567  * Parameters:	p				The process doing the fstat
5568  *		uap->fd				The fd to stat
5569  *		uap->ub				The user stat buffer
5570  *		uap->xsecurity			The user extended security
5571  *						buffer, or 0 if none
5572  *		uap->xsecurity_size		The size of xsecurity, or 0
5573  *
5574  * Returns:	0				Success
5575  *		!0				Errno (see fstat)
5576  */
5577 int
sys_fstat64_extended(proc_t p,struct fstat64_extended_args * uap,__unused int32_t * retval)5578 sys_fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
5579 {
5580 	return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
5581 }
5582 
5583 
5584 /*
5585  * sys_fstat64
5586  *
5587  * Description:	Get 64 bit version of the file status for the file associated
5588  *		with fd
5589  *
5590  * Parameters:	p				The process doing the fstat
5591  *		uap->fd				The fd to stat
5592  *		uap->ub				The user stat buffer
5593  *
5594  * Returns:	0				Success
5595  *		!0				Errno (see fstat)
5596  */
5597 int
sys_fstat64(proc_t p,struct fstat64_args * uap,__unused int32_t * retval)5598 sys_fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
5599 {
5600 	return fstat(p, uap->fd, uap->ub, 0, 0, 1);
5601 }
5602 
5603 
5604 /*
5605  * sys_fpathconf
5606  *
5607  * Description:	Return pathconf information about a file descriptor.
5608  *
5609  * Parameters:	p				Process making the request
5610  *		uap->fd				fd to get information about
5611  *		uap->name			Name of information desired
5612  *		retval				Pointer to the call return area
5613  *
5614  * Returns:	0				Success
5615  *		EINVAL
5616  *	fp_lookup:EBADF				Bad file descriptor
5617  *	vnode_getwithref:???
5618  *	vn_pathconf:???
5619  *
5620  * Implicit returns:
5621  *		*retval (modified)		Returned information (numeric)
5622  */
5623 int
sys_fpathconf(proc_t p,struct fpathconf_args * uap,int32_t * retval)5624 sys_fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
5625 {
5626 	int fd = uap->fd;
5627 	struct fileproc *fp;
5628 	struct vnode *vp;
5629 	int error = 0;
5630 	file_type_t type;
5631 
5632 
5633 	AUDIT_ARG(fd, uap->fd);
5634 	if ((error = fp_lookup(p, fd, &fp, 0))) {
5635 		return error;
5636 	}
5637 	type = fp->f_type;
5638 
5639 	switch (type) {
5640 	case DTYPE_SOCKET:
5641 		if (uap->name != _PC_PIPE_BUF) {
5642 			error = EINVAL;
5643 			goto out;
5644 		}
5645 		*retval = PIPE_BUF;
5646 		error = 0;
5647 		goto out;
5648 
5649 	case DTYPE_PIPE:
5650 		if (uap->name != _PC_PIPE_BUF) {
5651 			error = EINVAL;
5652 			goto out;
5653 		}
5654 		*retval = PIPE_BUF;
5655 		error = 0;
5656 		goto out;
5657 
5658 	case DTYPE_VNODE:
5659 		vp = (struct vnode *)fp_get_data(fp);
5660 
5661 		if ((error = vnode_getwithref(vp)) == 0) {
5662 			AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5663 
5664 			error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
5665 
5666 			(void)vnode_put(vp);
5667 		}
5668 		goto out;
5669 
5670 	default:
5671 		error = EINVAL;
5672 		goto out;
5673 	}
5674 	/*NOTREACHED*/
5675 out:
5676 	fp_drop(p, fd, fp, 0);
5677 	return error;
5678 }
5679 
5680 /*
5681  * sys_flock
5682  *
5683  * Description:	Apply an advisory lock on a file descriptor.
5684  *
5685  * Parameters:	p				Process making request
5686  *		uap->fd				fd on which the lock is to be
5687  *						attempted
5688  *		uap->how			(Un)Lock bits, including type
5689  *		retval				Pointer to the call return area
5690  *
5691  * Returns:	0				Success
5692  *	fp_getfvp:EBADF				Bad file descriptor
5693  *	fp_getfvp:ENOTSUP			fd does not refer to a vnode
5694  *	vnode_getwithref:???
5695  *	VNOP_ADVLOCK:???
5696  *
5697  * Implicit returns:
5698  *		*retval (modified)		Size of dtable
5699  *
5700  * Notes:	Just attempt to get a record lock of the requested type on
5701  *		the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5702  */
5703 int
sys_flock(proc_t p,struct flock_args * uap,__unused int32_t * retval)5704 sys_flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5705 {
5706 	int fd = uap->fd;
5707 	int how = uap->how;
5708 	struct fileproc *fp;
5709 	struct vnode *vp;
5710 	struct flock lf;
5711 	vfs_context_t ctx = vfs_context_current();
5712 	int error = 0;
5713 
5714 	AUDIT_ARG(fd, uap->fd);
5715 	if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5716 		return error;
5717 	}
5718 	if ((error = vnode_getwithref(vp))) {
5719 		goto out1;
5720 	}
5721 	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5722 
5723 	lf.l_whence = SEEK_SET;
5724 	lf.l_start = 0;
5725 	lf.l_len = 0;
5726 	if (how & LOCK_UN) {
5727 		lf.l_type = F_UNLCK;
5728 		error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5729 		goto out;
5730 	}
5731 	if (how & LOCK_EX) {
5732 		lf.l_type = F_WRLCK;
5733 	} else if (how & LOCK_SH) {
5734 		lf.l_type = F_RDLCK;
5735 	} else {
5736 		error = EBADF;
5737 		goto out;
5738 	}
5739 #if CONFIG_MACF
5740 	error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob, F_SETLK, &lf);
5741 	if (error) {
5742 		goto out;
5743 	}
5744 #endif
5745 	error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
5746 	    (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5747 	    ctx, NULL);
5748 	if (!error) {
5749 		os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
5750 	}
5751 out:
5752 	(void)vnode_put(vp);
5753 out1:
5754 	fp_drop(p, fd, fp, 0);
5755 	return error;
5756 }
5757 
5758 /*
5759  * sys_fileport_makeport
5760  *
5761  * Description: Obtain a Mach send right for a given file descriptor.
5762  *
5763  * Parameters:	p		Process calling fileport
5764  *              uap->fd		The fd to reference
5765  *              uap->portnamep  User address at which to place port name.
5766  *
5767  * Returns:	0		Success.
5768  *              EBADF		Bad file descriptor.
5769  *              EINVAL		File descriptor had type that cannot be sent, misc. other errors.
5770  *              EFAULT		Address at which to store port name is not valid.
5771  *              EAGAIN		Resource shortage.
5772  *
5773  * Implicit returns:
5774  *		On success, name of send right is stored at user-specified address.
5775  */
5776 int
sys_fileport_makeport(proc_t p,struct fileport_makeport_args * uap,__unused int * retval)5777 sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5778     __unused int *retval)
5779 {
5780 	int err;
5781 	int fd = uap->fd;
5782 	user_addr_t user_portaddr = uap->portnamep;
5783 	struct fileproc *fp = FILEPROC_NULL;
5784 	struct fileglob *fg = NULL;
5785 	ipc_port_t fileport;
5786 	mach_port_name_t name = MACH_PORT_NULL;
5787 
5788 	proc_fdlock(p);
5789 	err = fp_lookup(p, fd, &fp, 1);
5790 	if (err != 0) {
5791 		goto out_unlock;
5792 	}
5793 
5794 	fg = fp->fp_glob;
5795 	if (!fg_sendable(fg)) {
5796 		err = EINVAL;
5797 		goto out_unlock;
5798 	}
5799 
5800 	if (fp_isguarded(fp, GUARD_FILEPORT)) {
5801 		err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5802 		goto out_unlock;
5803 	}
5804 
5805 	/* Dropped when port is deallocated */
5806 	fg_ref(p, fg);
5807 
5808 	proc_fdunlock(p);
5809 
5810 	/* Allocate and initialize a port */
5811 	fileport = fileport_alloc(fg);
5812 	if (fileport == IPC_PORT_NULL) {
5813 		fg_drop_live(fg);
5814 		err = EAGAIN;
5815 		goto out;
5816 	}
5817 
5818 	/* Add an entry.  Deallocates port on failure. */
5819 	name = ipc_port_copyout_send(fileport, get_task_ipcspace(proc_task(p)));
5820 	if (!MACH_PORT_VALID(name)) {
5821 		err = EINVAL;
5822 		goto out;
5823 	}
5824 
5825 	err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5826 	if (err != 0) {
5827 		goto out;
5828 	}
5829 
5830 	/* Tag the fileglob for debugging purposes */
5831 	lck_mtx_lock_spin(&fg->fg_lock);
5832 	fg->fg_lflags |= FG_PORTMADE;
5833 	lck_mtx_unlock(&fg->fg_lock);
5834 
5835 	fp_drop(p, fd, fp, 0);
5836 
5837 	return 0;
5838 
5839 out_unlock:
5840 	proc_fdunlock(p);
5841 out:
5842 	if (MACH_PORT_VALID(name)) {
5843 		/* Don't care if another thread races us to deallocate the entry */
5844 		(void) mach_port_deallocate(get_task_ipcspace(proc_task(p)), name);
5845 	}
5846 
5847 	if (fp != FILEPROC_NULL) {
5848 		fp_drop(p, fd, fp, 0);
5849 	}
5850 
5851 	return err;
5852 }
5853 
5854 void
fileport_releasefg(struct fileglob * fg)5855 fileport_releasefg(struct fileglob *fg)
5856 {
5857 	(void)fg_drop(PROC_NULL, fg);
5858 }
5859 
5860 /*
5861  * fileport_makefd
5862  *
5863  * Description: Obtain the file descriptor for a given Mach send right.
5864  *
5865  * Returns:	0		Success
5866  *		EINVAL		Invalid Mach port name, or port is not for a file.
5867  *	fdalloc:EMFILE
5868  *	fdalloc:ENOMEM		Unable to allocate fileproc or extend file table.
5869  *
5870  * Implicit returns:
5871  *		*retval (modified)		The new descriptor
5872  */
5873 int
fileport_makefd(proc_t p,ipc_port_t port,fileproc_flags_t fp_flags,int * retval)5874 fileport_makefd(proc_t p, ipc_port_t port, fileproc_flags_t fp_flags, int *retval)
5875 {
5876 	struct fileglob *fg;
5877 	struct fileproc *fp = FILEPROC_NULL;
5878 	int fd;
5879 	int err;
5880 
5881 	fg = fileport_port_to_fileglob(port);
5882 	if (fg == NULL) {
5883 		err = EINVAL;
5884 		goto out;
5885 	}
5886 
5887 	fp = fileproc_alloc_init();
5888 
5889 	proc_fdlock(p);
5890 	err = fdalloc(p, 0, &fd);
5891 	if (err != 0) {
5892 		proc_fdunlock(p);
5893 		goto out;
5894 	}
5895 	if (fp_flags) {
5896 		fp->fp_flags |= fp_flags;
5897 	}
5898 
5899 	fp->fp_glob = fg;
5900 	fg_ref(p, fg);
5901 
5902 	procfdtbl_releasefd(p, fd, fp);
5903 	proc_fdunlock(p);
5904 
5905 	*retval = fd;
5906 	err = 0;
5907 out:
5908 	if ((fp != NULL) && (0 != err)) {
5909 		fileproc_free(fp);
5910 	}
5911 
5912 	return err;
5913 }
5914 
5915 /*
5916  * sys_fileport_makefd
5917  *
5918  * Description: Obtain the file descriptor for a given Mach send right.
5919  *
5920  * Parameters:	p		Process calling fileport
5921  *              uap->port	Name of send right to file port.
5922  *
5923  * Returns:	0		Success
5924  *		EINVAL		Invalid Mach port name, or port is not for a file.
5925  *	fdalloc:EMFILE
5926  *	fdalloc:ENOMEM		Unable to allocate fileproc or extend file table.
5927  *
5928  * Implicit returns:
5929  *		*retval (modified)		The new descriptor
5930  */
5931 int
sys_fileport_makefd(proc_t p,struct fileport_makefd_args * uap,int32_t * retval)5932 sys_fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5933 {
5934 	ipc_port_t port = IPC_PORT_NULL;
5935 	mach_port_name_t send = uap->port;
5936 	kern_return_t res;
5937 	int err;
5938 
5939 	res = ipc_object_copyin(get_task_ipcspace(proc_task(p)),
5940 	    send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
5941 
5942 	if (res == KERN_SUCCESS) {
5943 		err = fileport_makefd(p, port, FP_CLOEXEC, retval);
5944 	} else {
5945 		err = EINVAL;
5946 	}
5947 
5948 	if (IPC_PORT_NULL != port) {
5949 		ipc_port_release_send(port);
5950 	}
5951 
5952 	return err;
5953 }
5954 
5955 
5956 #pragma mark fileops wrappers
5957 
5958 /*
5959  * fo_read
5960  *
5961  * Description:	Generic fileops read indirected through the fileops pointer
5962  *		in the fileproc structure
5963  *
5964  * Parameters:	fp				fileproc structure pointer
5965  *		uio				user I/O structure pointer
5966  *		flags				FOF_ flags
5967  *		ctx				VFS context for operation
5968  *
5969  * Returns:	0				Success
5970  *		!0				Errno from read
5971  */
5972 int
fo_read(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5973 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5974 {
5975 	return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
5976 }
5977 
5978 int
fo_no_read(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5979 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5980 {
5981 #pragma unused(fp, uio, flags, ctx)
5982 	return ENXIO;
5983 }
5984 
5985 
5986 /*
5987  * fo_write
5988  *
5989  * Description:	Generic fileops write indirected through the fileops pointer
5990  *		in the fileproc structure
5991  *
5992  * Parameters:	fp				fileproc structure pointer
5993  *		uio				user I/O structure pointer
5994  *		flags				FOF_ flags
5995  *		ctx				VFS context for operation
5996  *
5997  * Returns:	0				Success
5998  *		!0				Errno from write
5999  */
6000 int
fo_write(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)6001 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
6002 {
6003 	return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
6004 }
6005 
6006 int
fo_no_write(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)6007 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
6008 {
6009 #pragma unused(fp, uio, flags, ctx)
6010 	return ENXIO;
6011 }
6012 
6013 
6014 /*
6015  * fo_ioctl
6016  *
6017  * Description:	Generic fileops ioctl indirected through the fileops pointer
6018  *		in the fileproc structure
6019  *
6020  * Parameters:	fp				fileproc structure pointer
6021  *		com				ioctl command
6022  *		data				pointer to internalized copy
6023  *						of user space ioctl command
6024  *						parameter data in kernel space
6025  *		ctx				VFS context for operation
6026  *
6027  * Returns:	0				Success
6028  *		!0				Errno from ioctl
6029  *
6030  * Locks:	The caller is assumed to have held the proc_fdlock; this
6031  *		function releases and reacquires this lock.  If the caller
6032  *		accesses data protected by this lock prior to calling this
6033  *		function, it will need to revalidate/reacquire any cached
6034  *		protected data obtained prior to the call.
6035  */
6036 int
fo_ioctl(struct fileproc * fp,u_long com,caddr_t data,vfs_context_t ctx)6037 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
6038 {
6039 	int error;
6040 
6041 	proc_fdunlock(vfs_context_proc(ctx));
6042 	error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
6043 	proc_fdlock(vfs_context_proc(ctx));
6044 	return error;
6045 }
6046 
6047 int
fo_no_ioctl(struct fileproc * fp,u_long com,caddr_t data,vfs_context_t ctx)6048 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
6049 {
6050 #pragma unused(fp, com, data, ctx)
6051 	return ENOTTY;
6052 }
6053 
6054 
6055 /*
6056  * fo_select
6057  *
6058  * Description:	Generic fileops select indirected through the fileops pointer
6059  *		in the fileproc structure
6060  *
6061  * Parameters:	fp				fileproc structure pointer
6062  *		which				select which
6063  *		wql				pointer to wait queue list
6064  *		ctx				VFS context for operation
6065  *
6066  * Returns:	0				Success
6067  *		!0				Errno from select
6068  */
6069 int
fo_select(struct fileproc * fp,int which,void * wql,vfs_context_t ctx)6070 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
6071 {
6072 	return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
6073 }
6074 
6075 int
fo_no_select(struct fileproc * fp,int which,void * wql,vfs_context_t ctx)6076 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
6077 {
6078 #pragma unused(fp, which, wql, ctx)
6079 	return ENOTSUP;
6080 }
6081 
6082 
6083 /*
6084  * fo_close
6085  *
6086  * Description:	Generic fileops close indirected through the fileops pointer
6087  *		in the fileproc structure
6088  *
6089  * Parameters:	fp				fileproc structure pointer for
6090  *						file to close
6091  *		ctx				VFS context for operation
6092  *
6093  * Returns:	0				Success
6094  *		!0				Errno from close
6095  */
6096 int
fo_close(struct fileglob * fg,vfs_context_t ctx)6097 fo_close(struct fileglob *fg, vfs_context_t ctx)
6098 {
6099 	return (*fg->fg_ops->fo_close)(fg, ctx);
6100 }
6101 
6102 
6103 /*
6104  * fo_drain
6105  *
6106  * Description:	Generic fileops kqueue filter indirected through the fileops
6107  *		pointer in the fileproc structure
6108  *
6109  * Parameters:	fp				fileproc structure pointer
6110  *		ctx				VFS context for operation
6111  *
6112  * Returns:	0				Success
6113  *		!0				errno from drain
6114  */
6115 int
fo_drain(struct fileproc * fp,vfs_context_t ctx)6116 fo_drain(struct fileproc *fp, vfs_context_t ctx)
6117 {
6118 	return (*fp->f_ops->fo_drain)(fp, ctx);
6119 }
6120 
6121 int
fo_no_drain(struct fileproc * fp,vfs_context_t ctx)6122 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
6123 {
6124 #pragma unused(fp, ctx)
6125 	return ENOTSUP;
6126 }
6127 
6128 
6129 /*
6130  * fo_kqfilter
6131  *
6132  * Description:	Generic fileops kqueue filter indirected through the fileops
6133  *		pointer in the fileproc structure
6134  *
6135  * Parameters:	fp				fileproc structure pointer
6136  *		kn				pointer to knote to filter on
6137  *
6138  * Returns:	(kn->kn_flags & EV_ERROR)	error in kn->kn_data
6139  *		0				Filter is not active
6140  *		!0				Filter is active
6141  */
6142 int
fo_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)6143 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6144 {
6145 	return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
6146 }
6147 
6148 int
fo_no_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)6149 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6150 {
6151 #pragma unused(fp, kev)
6152 	knote_set_error(kn, ENOTSUP);
6153 	return 0;
6154 }
6155