xref: /xnu-8792.81.2/bsd/kern/kern_descrip.c (revision 19c3b8c28c31cb8130e034cfb5df6bf9ba342d90)
1 /*
2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  * (c) UNIX System Laboratories, Inc.
33  * All or some portions of this file are derived from material licensed
34  * to the University of California by American Telephone and Telegraph
35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36  * the permission of UNIX System Laboratories, Inc.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. All advertising materials mentioning features or use of this software
47  *    must display the following acknowledgement:
48  *	This product includes software developed by the University of
49  *	California, Berkeley and its contributors.
50  * 4. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)kern_descrip.c	8.8 (Berkeley) 2/14/95
67  */
68 /*
69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70  * support for mandatory and extensible security protections.  This notice
71  * is included in support of clause 2.2 (b) of the Apple Public License,
72  * Version 2.0.
73  */
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/kauth.h>
82 #include <sys/file_internal.h>
83 #include <sys/guarded.h>
84 #include <sys/priv.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/stat.h>
88 #include <sys/ioctl.h>
89 #include <sys/fcntl.h>
90 #include <sys/fsctl.h>
91 #include <sys/malloc.h>
92 #include <sys/mman.h>
93 #include <sys/mount.h>
94 #include <sys/syslog.h>
95 #include <sys/unistd.h>
96 #include <sys/resourcevar.h>
97 #include <sys/aio_kern.h>
98 #include <sys/ev.h>
99 #include <kern/locks.h>
100 #include <sys/uio_internal.h>
101 #include <sys/codesign.h>
102 #include <sys/codedir_internal.h>
103 #include <sys/mount_internal.h>
104 #include <sys/kdebug.h>
105 #include <sys/sysproto.h>
106 #include <sys/pipe.h>
107 #include <sys/spawn.h>
108 #include <sys/cprotect.h>
109 #include <sys/ubc_internal.h>
110 
111 #include <kern/kern_types.h>
112 #include <kern/kalloc.h>
113 #include <kern/waitq.h>
114 #include <kern/ipc_misc.h>
115 #include <kern/ast.h>
116 
117 #include <vm/vm_protos.h>
118 #include <mach/mach_port.h>
119 
120 #include <security/audit/audit.h>
121 #if CONFIG_MACF
122 #include <security/mac_framework.h>
123 #endif
124 
125 #include <stdbool.h>
126 #include <os/atomic_private.h>
127 #include <os/overflow.h>
128 #include <IOKit/IOBSD.h>
129 
130 #define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
131 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
132     mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
133 void ipc_port_release_send(ipc_port_t);
134 
135 void fileport_releasefg(struct fileglob *fg);
136 
137 /* flags for fp_close_and_unlock */
138 #define FD_DUP2RESV 1
139 
140 /* We don't want these exported */
141 
142 __private_extern__
143 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
144 
145 /* Conflict wait queue for when selects collide (opaque type) */
146 extern struct waitq select_conflict_queue;
147 
148 #define f_flag fp_glob->fg_flag
149 #define f_type fp_glob->fg_ops->fo_type
150 #define f_cred fp_glob->fg_cred
151 #define f_ops fp_glob->fg_ops
152 #define f_offset fp_glob->fg_offset
153 
154 ZONE_DEFINE_TYPE(fg_zone, "fileglob", struct fileglob, ZC_ZFREE_CLEARMEM);
155 ZONE_DEFINE_ID(ZONE_ID_FILEPROC, "fileproc", struct fileproc, ZC_ZFREE_CLEARMEM);
156 
157 /*
158  * If you need accounting for KM_OFILETABL consider using
159  * KALLOC_HEAP_DEFINE to define a view.
160  */
161 #define KM_OFILETABL KHEAP_DEFAULT
162 
163 /*
164  * rdar://88960128
165  */
166 #define fd_alloc_files(n_files, flags)                       \
167 	__typed_allocators_ignore_push                        \
168 	kheap_alloc(KM_OFILETABL, n_files * OFILESIZE, flags) \
169 	__typed_allocators_ignore_pop
170 
171 #define fd_free_files(files, n_files)                        \
172 	__typed_allocators_ignore_push                        \
173 	kheap_free(KM_OFILETABL, ofiles, n_files * OFILESIZE) \
174 	__typed_allocators_ignore_pop
175 
176 /*
177  * Descriptor management.
178  */
179 int nfiles;                     /* actual number of open files */
180 /*
181  * "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
182  */
183 static const struct fileops uninitops;
184 
185 os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
186 static LCK_GRP_DECLARE(file_lck_grp, "file");
187 
188 
189 #pragma mark fileglobs
190 
191 /*!
192  * @function fg_free
193  *
194  * @brief
195  * Free a file structure.
196  */
197 static void
fg_free(struct fileglob * fg)198 fg_free(struct fileglob *fg)
199 {
200 	os_atomic_dec(&nfiles, relaxed);
201 
202 	if (fg->fg_vn_data) {
203 		fg_vn_data_free(fg->fg_vn_data);
204 		fg->fg_vn_data = NULL;
205 	}
206 
207 	kauth_cred_t cred = fg->fg_cred;
208 	if (IS_VALID_CRED(cred)) {
209 		kauth_cred_unref(&cred);
210 		fg->fg_cred = NOCRED;
211 	}
212 	lck_mtx_destroy(&fg->fg_lock, &file_lck_grp);
213 
214 #if CONFIG_MACF && CONFIG_VNGUARD
215 	vng_file_label_destroy(fg);
216 #endif
217 	zfree(fg_zone, fg);
218 }
219 
220 OS_ALWAYS_INLINE
221 void
fg_ref(proc_t p,struct fileglob * fg)222 fg_ref(proc_t p, struct fileglob *fg)
223 {
224 #if DEBUG || DEVELOPMENT
225 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
226 #else
227 	(void)p;
228 #endif
229 	os_ref_retain_raw(&fg->fg_count, &f_refgrp);
230 }
231 
232 void
fg_drop_live(struct fileglob * fg)233 fg_drop_live(struct fileglob *fg)
234 {
235 	os_ref_release_live_raw(&fg->fg_count, &f_refgrp);
236 }
237 
238 int
fg_drop(proc_t p,struct fileglob * fg)239 fg_drop(proc_t p, struct fileglob *fg)
240 {
241 	struct vnode *vp;
242 	struct vfs_context context;
243 	int error = 0;
244 
245 	if (fg == NULL) {
246 		return 0;
247 	}
248 
249 	/* Set up context with cred stashed in fg */
250 	if (p == current_proc()) {
251 		context.vc_thread = current_thread();
252 	} else {
253 		context.vc_thread = NULL;
254 	}
255 	context.vc_ucred = fg->fg_cred;
256 
257 	/*
258 	 * POSIX record locking dictates that any close releases ALL
259 	 * locks owned by this process.  This is handled by setting
260 	 * a flag in the unlock to free ONLY locks obeying POSIX
261 	 * semantics, and not to free BSD-style file locks.
262 	 * If the descriptor was in a message, POSIX-style locks
263 	 * aren't passed with the descriptor.
264 	 */
265 	if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
266 	    (p->p_ladvflag & P_LADVLOCK)) {
267 		struct flock lf = {
268 			.l_whence = SEEK_SET,
269 			.l_type = F_UNLCK,
270 		};
271 
272 		vp = (struct vnode *)fg_get_data(fg);
273 		if ((error = vnode_getwithref(vp)) == 0) {
274 			(void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
275 			(void)vnode_put(vp);
276 		}
277 	}
278 
279 	if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == 0) {
280 		/*
281 		 * Since we ensure that fg->fg_ops is always initialized,
282 		 * it is safe to invoke fo_close on the fg
283 		 */
284 		error = fo_close(fg, &context);
285 
286 		fg_free(fg);
287 	}
288 
289 	return error;
290 }
291 
292 inline
293 void
fg_set_data(struct fileglob * fg,void * fg_data)294 fg_set_data(
295 	struct fileglob *fg,
296 	void *fg_data)
297 {
298 	uintptr_t *store = &fg->fg_data;
299 
300 #if __has_feature(ptrauth_calls)
301 	int type = FILEGLOB_DTYPE(fg);
302 
303 	if (fg_data) {
304 		type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
305 		fg_data = ptrauth_sign_unauthenticated(fg_data,
306 		    ptrauth_key_process_independent_data,
307 		    ptrauth_blend_discriminator(store, type));
308 	}
309 #endif // __has_feature(ptrauth_calls)
310 
311 	*store = (uintptr_t)fg_data;
312 }
313 
314 inline
315 void *
fg_get_data_volatile(struct fileglob * fg)316 fg_get_data_volatile(struct fileglob *fg)
317 {
318 	uintptr_t *store = &fg->fg_data;
319 	void *fg_data = (void *)*store;
320 
321 #if __has_feature(ptrauth_calls)
322 	int type = FILEGLOB_DTYPE(fg);
323 
324 	if (fg_data) {
325 		type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
326 		fg_data = ptrauth_auth_data(fg_data,
327 		    ptrauth_key_process_independent_data,
328 		    ptrauth_blend_discriminator(store, type));
329 	}
330 #endif // __has_feature(ptrauth_calls)
331 
332 	return fg_data;
333 }
334 
335 static void
fg_transfer_filelocks(proc_t p,struct fileglob * fg,thread_t thread)336 fg_transfer_filelocks(proc_t p, struct fileglob *fg, thread_t thread)
337 {
338 	struct vnode *vp;
339 	struct vfs_context context;
340 	struct proc *old_proc = current_proc();
341 
342 	assert(fg != NULL);
343 
344 	assert(p != old_proc);
345 	context.vc_thread = thread;
346 	context.vc_ucred = fg->fg_cred;
347 
348 	/* Transfer all POSIX Style locks to new proc */
349 	if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
350 	    (p->p_ladvflag & P_LADVLOCK)) {
351 		struct flock lf = {
352 			.l_whence = SEEK_SET,
353 			.l_start = 0,
354 			.l_len = 0,
355 			.l_type = F_TRANSFER,
356 		};
357 
358 		vp = (struct vnode *)fg_get_data(fg);
359 		if (vnode_getwithref(vp) == 0) {
360 			(void)VNOP_ADVLOCK(vp, (caddr_t)old_proc, F_TRANSFER, &lf, F_POSIX, &context, NULL);
361 			(void)vnode_put(vp);
362 		}
363 	}
364 
365 	/* Transfer all OFD Style locks to new proc */
366 	if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
367 	    (fg->fg_lflags & FG_HAS_OFDLOCK)) {
368 		struct flock lf = {
369 			.l_whence = SEEK_SET,
370 			.l_start = 0,
371 			.l_len = 0,
372 			.l_type = F_TRANSFER,
373 		};
374 
375 		vp = (struct vnode *)fg_get_data(fg);
376 		if (vnode_getwithref(vp) == 0) {
377 			(void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_TRANSFER, &lf, F_OFD_LOCK, &context, NULL);
378 			(void)vnode_put(vp);
379 		}
380 	}
381 	return;
382 }
383 
384 bool
fg_sendable(struct fileglob * fg)385 fg_sendable(struct fileglob *fg)
386 {
387 	switch (FILEGLOB_DTYPE(fg)) {
388 	case DTYPE_VNODE:
389 	case DTYPE_SOCKET:
390 	case DTYPE_PIPE:
391 	case DTYPE_PSXSHM:
392 	case DTYPE_NETPOLICY:
393 		return (fg->fg_lflags & FG_CONFINED) == 0;
394 
395 	default:
396 		return false;
397 	}
398 }
399 
400 #pragma mark file descriptor table (static helpers)
401 
402 static void
procfdtbl_reservefd(struct proc * p,int fd)403 procfdtbl_reservefd(struct proc * p, int fd)
404 {
405 	p->p_fd.fd_ofiles[fd] = NULL;
406 	p->p_fd.fd_ofileflags[fd] |= UF_RESERVED;
407 }
408 
409 void
procfdtbl_releasefd(struct proc * p,int fd,struct fileproc * fp)410 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
411 {
412 	if (fp != NULL) {
413 		p->p_fd.fd_ofiles[fd] = fp;
414 	}
415 	p->p_fd.fd_ofileflags[fd] &= ~UF_RESERVED;
416 	if ((p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
417 		p->p_fd.fd_ofileflags[fd] &= ~UF_RESVWAIT;
418 		wakeup(&p->p_fd);
419 	}
420 }
421 
422 static void
procfdtbl_waitfd(struct proc * p,int fd)423 procfdtbl_waitfd(struct proc * p, int fd)
424 {
425 	p->p_fd.fd_ofileflags[fd] |= UF_RESVWAIT;
426 	msleep(&p->p_fd, &p->p_fd.fd_lock, PRIBIO, "ftbl_waitfd", NULL);
427 }
428 
429 static void
procfdtbl_clearfd(struct proc * p,int fd)430 procfdtbl_clearfd(struct proc * p, int fd)
431 {
432 	int waiting;
433 
434 	waiting = (p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT);
435 	p->p_fd.fd_ofiles[fd] = NULL;
436 	p->p_fd.fd_ofileflags[fd] = 0;
437 	if (waiting == UF_RESVWAIT) {
438 		wakeup(&p->p_fd);
439 	}
440 }
441 
442 /*
443  * fdrelse
444  *
445  * Description:	Inline utility function to free an fd in a filedesc
446  *
447  * Parameters:	fdp				Pointer to filedesc fd lies in
448  *		fd				fd to free
449  *		reserv				fd should be reserved
450  *
451  * Returns:	void
452  *
453  * Locks:	Assumes proc_fdlock for process pointing to fdp is held by
454  *		the caller
455  */
456 void
fdrelse(struct proc * p,int fd)457 fdrelse(struct proc * p, int fd)
458 {
459 	struct filedesc *fdp = &p->p_fd;
460 	int nfd = 0;
461 
462 	if (fd < fdp->fd_freefile) {
463 		fdp->fd_freefile = fd;
464 	}
465 #if DIAGNOSTIC
466 	if (fd >= fdp->fd_afterlast) {
467 		panic("fdrelse: fd_afterlast inconsistent");
468 	}
469 #endif
470 	procfdtbl_clearfd(p, fd);
471 
472 	nfd = fdp->fd_afterlast;
473 	while (nfd > 0 && fdp->fd_ofiles[nfd - 1] == NULL &&
474 	    !(fdp->fd_ofileflags[nfd - 1] & UF_RESERVED)) {
475 		nfd--;
476 	}
477 	fdp->fd_afterlast = nfd;
478 
479 #if CONFIG_PROC_RESOURCE_LIMITS
480 	fdp->fd_nfiles_open--;
481 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
482 }
483 
484 
485 /*
486  * finishdup
487  *
488  * Description:	Common code for dup, dup2, and fcntl(F_DUPFD).
489  *
490  * Parameters:	p				Process performing the dup
491  *		old				The fd to dup
492  *		new				The fd to dup it to
493  *		fp_flags			Flags to augment the new fp
494  *		retval				Pointer to the call return area
495  *
496  * Returns:	0				Success
497  *		EBADF
498  *		ENOMEM
499  *
500  * Implicit returns:
501  *		*retval (modified)		The new descriptor
502  *
503  * Locks:	Assumes proc_fdlock for process pointing to fdp is held by
504  *		the caller
505  *
506  * Notes:	This function may drop and reacquire this lock; it is unsafe
507  *		for a caller to assume that other state protected by the lock
508  *		has not been subsequently changed out from under it.
509  */
510 static int
finishdup(proc_t p,struct filedesc * fdp,int old,int new,fileproc_flags_t fp_flags,int32_t * retval)511 finishdup(proc_t p, struct filedesc *fdp, int old, int new,
512     fileproc_flags_t fp_flags, int32_t *retval)
513 {
514 	struct fileproc *nfp;
515 	struct fileproc *ofp;
516 #if CONFIG_MACF
517 	int error;
518 	kauth_cred_t cred;
519 #endif
520 
521 #if DIAGNOSTIC
522 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
523 #endif
524 	if ((ofp = fdp->fd_ofiles[old]) == NULL ||
525 	    (fdp->fd_ofileflags[old] & UF_RESERVED)) {
526 		fdrelse(p, new);
527 		return EBADF;
528 	}
529 
530 #if CONFIG_MACF
531 	cred = kauth_cred_proc_ref(p);
532 	error = mac_file_check_dup(cred, ofp->fp_glob, new);
533 	kauth_cred_unref(&cred);
534 
535 	if (error) {
536 		fdrelse(p, new);
537 		return error;
538 	}
539 #endif
540 
541 	fg_ref(p, ofp->fp_glob);
542 
543 	proc_fdunlock(p);
544 
545 	nfp = fileproc_alloc_init();
546 
547 	if (fp_flags) {
548 		nfp->fp_flags |= fp_flags;
549 	}
550 	nfp->fp_glob = ofp->fp_glob;
551 
552 	proc_fdlock(p);
553 
554 #if DIAGNOSTIC
555 	if (fdp->fd_ofiles[new] != 0) {
556 		panic("finishdup: overwriting fd_ofiles with new %d", new);
557 	}
558 	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
559 		panic("finishdup: unreserved fileflags with new %d", new);
560 	}
561 #endif
562 
563 	if (new >= fdp->fd_afterlast) {
564 		fdp->fd_afterlast = new + 1;
565 	}
566 	procfdtbl_releasefd(p, new, nfp);
567 	*retval = new;
568 	return 0;
569 }
570 
571 
572 #pragma mark file descriptor table (exported functions)
573 
574 void
proc_dirs_lock_shared(proc_t p)575 proc_dirs_lock_shared(proc_t p)
576 {
577 	lck_rw_lock_shared(&p->p_fd.fd_dirs_lock);
578 }
579 
580 void
proc_dirs_unlock_shared(proc_t p)581 proc_dirs_unlock_shared(proc_t p)
582 {
583 	lck_rw_unlock_shared(&p->p_fd.fd_dirs_lock);
584 }
585 
586 void
proc_dirs_lock_exclusive(proc_t p)587 proc_dirs_lock_exclusive(proc_t p)
588 {
589 	lck_rw_lock_exclusive(&p->p_fd.fd_dirs_lock);
590 }
591 
592 void
proc_dirs_unlock_exclusive(proc_t p)593 proc_dirs_unlock_exclusive(proc_t p)
594 {
595 	lck_rw_unlock_exclusive(&p->p_fd.fd_dirs_lock);
596 }
597 
598 /*
599  * proc_fdlock, proc_fdlock_spin
600  *
601  * Description:	Lock to control access to the per process struct fileproc
602  *		and struct filedesc
603  *
604  * Parameters:	p				Process to take the lock on
605  *
606  * Returns:	void
607  *
608  * Notes:	The lock is initialized in forkproc() and destroyed in
609  *		reap_child_process().
610  */
611 void
proc_fdlock(proc_t p)612 proc_fdlock(proc_t p)
613 {
614 	lck_mtx_lock(&p->p_fd.fd_lock);
615 }
616 
617 void
proc_fdlock_spin(proc_t p)618 proc_fdlock_spin(proc_t p)
619 {
620 	lck_mtx_lock_spin(&p->p_fd.fd_lock);
621 }
622 
623 void
proc_fdlock_assert(proc_t p,int assertflags)624 proc_fdlock_assert(proc_t p, int assertflags)
625 {
626 	lck_mtx_assert(&p->p_fd.fd_lock, assertflags);
627 }
628 
629 
630 /*
631  * proc_fdunlock
632  *
633  * Description:	Unlock the lock previously locked by a call to proc_fdlock()
634  *
635  * Parameters:	p				Process to drop the lock on
636  *
637  * Returns:	void
638  */
639 void
proc_fdunlock(proc_t p)640 proc_fdunlock(proc_t p)
641 {
642 	lck_mtx_unlock(&p->p_fd.fd_lock);
643 }
644 
645 bool
fdt_available_locked(proc_t p,int n)646 fdt_available_locked(proc_t p, int n)
647 {
648 	struct filedesc *fdp = &p->p_fd;
649 	struct fileproc **fpp;
650 	char *flags;
651 	int i;
652 	int lim = proc_limitgetcur_nofile(p);
653 
654 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
655 		return true;
656 	}
657 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
658 	flags = &fdp->fd_ofileflags[fdp->fd_freefile];
659 	for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
660 		if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
661 			return true;
662 		}
663 	}
664 	return false;
665 }
666 
667 
668 struct fdt_iterator
fdt_next(proc_t p,int fd,bool only_settled)669 fdt_next(proc_t p, int fd, bool only_settled)
670 {
671 	struct fdt_iterator it;
672 	struct filedesc *fdp = &p->p_fd;
673 	struct fileproc *fp;
674 	int nfds = fdp->fd_afterlast;
675 
676 	while (++fd < nfds) {
677 		fp = fdp->fd_ofiles[fd];
678 		if (fp == NULL || fp->fp_glob == NULL) {
679 			continue;
680 		}
681 		if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
682 			continue;
683 		}
684 		it.fdti_fd = fd;
685 		it.fdti_fp = fp;
686 		return it;
687 	}
688 
689 	it.fdti_fd = nfds;
690 	it.fdti_fp = NULL;
691 	return it;
692 }
693 
694 struct fdt_iterator
fdt_prev(proc_t p,int fd,bool only_settled)695 fdt_prev(proc_t p, int fd, bool only_settled)
696 {
697 	struct fdt_iterator it;
698 	struct filedesc *fdp = &p->p_fd;
699 	struct fileproc *fp;
700 
701 	while (--fd >= 0) {
702 		fp = fdp->fd_ofiles[fd];
703 		if (fp == NULL || fp->fp_glob == NULL) {
704 			continue;
705 		}
706 		if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
707 			continue;
708 		}
709 		it.fdti_fd = fd;
710 		it.fdti_fp = fp;
711 		return it;
712 	}
713 
714 	it.fdti_fd = -1;
715 	it.fdti_fp = NULL;
716 	return it;
717 }
718 
719 void
fdt_init(proc_t p)720 fdt_init(proc_t p)
721 {
722 	struct filedesc *fdp = &p->p_fd;
723 
724 	lck_mtx_init(&fdp->fd_kqhashlock, &proc_kqhashlock_grp, &proc_lck_attr);
725 	lck_mtx_init(&fdp->fd_knhashlock, &proc_knhashlock_grp, &proc_lck_attr);
726 	lck_mtx_init(&fdp->fd_lock, &proc_fdmlock_grp, &proc_lck_attr);
727 	lck_rw_init(&fdp->fd_dirs_lock, &proc_dirslock_grp, &proc_lck_attr);
728 }
729 
730 void
fdt_destroy(proc_t p)731 fdt_destroy(proc_t p)
732 {
733 	struct filedesc *fdp = &p->p_fd;
734 
735 	lck_mtx_destroy(&fdp->fd_kqhashlock, &proc_kqhashlock_grp);
736 	lck_mtx_destroy(&fdp->fd_knhashlock, &proc_knhashlock_grp);
737 	lck_mtx_destroy(&fdp->fd_lock, &proc_fdmlock_grp);
738 	lck_rw_destroy(&fdp->fd_dirs_lock, &proc_dirslock_grp);
739 }
740 
741 void
fdt_exec(proc_t p,short posix_spawn_flags,thread_t thread,bool in_exec)742 fdt_exec(proc_t p, short posix_spawn_flags, thread_t thread, bool in_exec)
743 {
744 	struct filedesc *fdp = &p->p_fd;
745 	thread_t self = current_thread();
746 	struct uthread *ut = get_bsdthread_info(self);
747 	struct kqworkq *dealloc_kqwq = NULL;
748 
749 	/*
750 	 * If the current thread is bound as a workq/workloop
751 	 * servicing thread, we need to unbind it first.
752 	 */
753 	if (ut->uu_kqr_bound && get_bsdthreadtask_info(self) == p) {
754 		kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
755 	}
756 
757 	/*
758 	 * Deallocate the knotes for this process
759 	 * and mark the tables non-existent so
760 	 * subsequent kqueue closes go faster.
761 	 */
762 	knotes_dealloc(p);
763 	assert(fdp->fd_knlistsize == 0);
764 	assert(fdp->fd_knhashmask == 0);
765 
766 	proc_fdlock(p);
767 
768 	/* Set the P_LADVLOCK flag if the flag set on old proc */
769 	if (in_exec && (current_proc()->p_ladvflag & P_LADVLOCK)) {
770 		os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
771 	}
772 
773 	for (int i = fdp->fd_afterlast; i-- > 0;) {
774 		struct fileproc *fp = fdp->fd_ofiles[i];
775 		char *flagp = &fdp->fd_ofileflags[i];
776 		bool inherit_file = true;
777 
778 		if (fp == FILEPROC_NULL) {
779 			continue;
780 		}
781 
782 		/*
783 		 * no file descriptor should be in flux when in exec,
784 		 * because we stopped all other threads
785 		 */
786 		if (*flagp & ~UF_INHERIT) {
787 			panic("file %d/%p in flux during exec of %p", i, fp, p);
788 		}
789 
790 		if (fp->fp_flags & FP_CLOEXEC) {
791 			inherit_file = false;
792 		} else if ((posix_spawn_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) &&
793 		    !(*flagp & UF_INHERIT)) {
794 			/*
795 			 * Reverse the usual semantics of file descriptor
796 			 * inheritance - all of them should be closed
797 			 * except files marked explicitly as "inherit" and
798 			 * not marked close-on-exec.
799 			 */
800 			inherit_file = false;
801 #if CONFIG_MACF
802 		} else if (mac_file_check_inherit(proc_ucred(p), fp->fp_glob)) {
803 			inherit_file = false;
804 #endif
805 		}
806 
807 		*flagp = 0; /* clear UF_INHERIT */
808 
809 		if (!inherit_file) {
810 			fp_close_and_unlock(p, i, fp, 0);
811 			proc_fdlock(p);
812 		} else if (in_exec) {
813 			/* Transfer F_POSIX style lock to new proc */
814 			proc_fdunlock(p);
815 			fg_transfer_filelocks(p, fp->fp_glob, thread);
816 			proc_fdlock(p);
817 		}
818 	}
819 
820 	/* release the per-process workq kq */
821 	if (fdp->fd_wqkqueue) {
822 		dealloc_kqwq = fdp->fd_wqkqueue;
823 		fdp->fd_wqkqueue = NULL;
824 	}
825 
826 	proc_fdunlock(p);
827 
828 	/* Anything to free? */
829 	if (dealloc_kqwq) {
830 		kqworkq_dealloc(dealloc_kqwq);
831 	}
832 }
833 
834 
835 int
fdt_fork(struct filedesc * newfdp,proc_t p,vnode_t uth_cdir,bool in_exec)836 fdt_fork(struct filedesc *newfdp, proc_t p, vnode_t uth_cdir, bool in_exec)
837 {
838 	struct filedesc *fdp = &p->p_fd;
839 	struct fileproc **ofiles;
840 	char *ofileflags;
841 	int n_files, afterlast, freefile;
842 	vnode_t v_dir;
843 #if CONFIG_PROC_RESOURCE_LIMITS
844 	int fd_nfiles_open = 0;
845 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
846 	proc_fdlock(p);
847 
848 	newfdp->fd_flags = (fdp->fd_flags & FILEDESC_FORK_INHERITED_MASK);
849 	newfdp->fd_cmask = fdp->fd_cmask;
850 #if CONFIG_PROC_RESOURCE_LIMITS
851 	newfdp->fd_nfiles_soft_limit = fdp->fd_nfiles_soft_limit;
852 	newfdp->fd_nfiles_hard_limit = fdp->fd_nfiles_hard_limit;
853 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
854 
855 	/*
856 	 * For both fd_cdir and fd_rdir make sure we get
857 	 * a valid reference... if we can't, than set
858 	 * set the pointer(s) to NULL in the child... this
859 	 * will keep us from using a non-referenced vp
860 	 * and allows us to do the vnode_rele only on
861 	 * a properly referenced vp
862 	 */
863 	if ((v_dir = fdp->fd_rdir)) {
864 		if (vnode_getwithref(v_dir) == 0) {
865 			if (vnode_ref(v_dir) == 0) {
866 				newfdp->fd_rdir = v_dir;
867 			}
868 			vnode_put(v_dir);
869 		}
870 		if (newfdp->fd_rdir == NULL) {
871 			/*
872 			 * We couldn't get a new reference on
873 			 * the chroot directory being
874 			 * inherited... this is fatal, since
875 			 * otherwise it would constitute an
876 			 * escape from a chroot environment by
877 			 * the new process.
878 			 */
879 			proc_fdunlock(p);
880 			return EPERM;
881 		}
882 	}
883 
884 	/*
885 	 * If we are running with per-thread current working directories,
886 	 * inherit the new current working directory from the current thread.
887 	 */
888 	if ((v_dir = uth_cdir ? uth_cdir : fdp->fd_cdir)) {
889 		if (vnode_getwithref(v_dir) == 0) {
890 			if (vnode_ref(v_dir) == 0) {
891 				newfdp->fd_cdir = v_dir;
892 			}
893 			vnode_put(v_dir);
894 		}
895 		if (newfdp->fd_cdir == NULL && v_dir == fdp->fd_cdir) {
896 			/*
897 			 * we couldn't get a new reference on
898 			 * the current working directory being
899 			 * inherited... we might as well drop
900 			 * our reference from the parent also
901 			 * since the vnode has gone DEAD making
902 			 * it useless... by dropping it we'll
903 			 * be that much closer to recycling it
904 			 */
905 			vnode_rele(fdp->fd_cdir);
906 			fdp->fd_cdir = NULL;
907 		}
908 	}
909 
910 	/*
911 	 * If the number of open files fits in the internal arrays
912 	 * of the open file structure, use them, otherwise allocate
913 	 * additional memory for the number of descriptors currently
914 	 * in use.
915 	 */
916 	afterlast = fdp->fd_afterlast;
917 	freefile = fdp->fd_freefile;
918 	if (afterlast <= NDFILE) {
919 		n_files = NDFILE;
920 	} else {
921 		n_files = roundup(afterlast, NDEXTENT);
922 	}
923 
924 	proc_fdunlock(p);
925 
926 	ofiles = fd_alloc_files(n_files, Z_WAITOK | Z_ZERO);
927 	if (ofiles == NULL) {
928 		if (newfdp->fd_cdir) {
929 			vnode_rele(newfdp->fd_cdir);
930 			newfdp->fd_cdir = NULL;
931 		}
932 		if (newfdp->fd_rdir) {
933 			vnode_rele(newfdp->fd_rdir);
934 			newfdp->fd_rdir = NULL;
935 		}
936 		return ENOMEM;
937 	}
938 	ofileflags = (char *)&ofiles[n_files];
939 
940 	proc_fdlock(p);
941 
942 	for (int i = afterlast; i-- > 0;) {
943 		struct fileproc *ofp, *nfp;
944 		char flags;
945 
946 		ofp = fdp->fd_ofiles[i];
947 		flags = fdp->fd_ofileflags[i];
948 
949 		if (ofp == NULL ||
950 		    (ofp->fp_glob->fg_lflags & FG_CONFINED) ||
951 		    ((ofp->fp_flags & FP_CLOFORK) && !in_exec) ||
952 		    ((ofp->fp_flags & FP_CLOEXEC) && in_exec) ||
953 		    (flags & UF_RESERVED)) {
954 			if (i + 1 == afterlast) {
955 				afterlast = i;
956 			}
957 			if (i < freefile) {
958 				freefile = i;
959 			}
960 
961 			continue;
962 		}
963 
964 		nfp = fileproc_alloc_init();
965 		nfp->fp_glob = ofp->fp_glob;
966 		if (in_exec) {
967 			nfp->fp_flags = (ofp->fp_flags & (FP_CLOEXEC | FP_CLOFORK));
968 			if (ofp->fp_guard_attrs) {
969 				guarded_fileproc_copy_guard(ofp, nfp);
970 			}
971 		} else {
972 			assert(ofp->fp_guard_attrs == 0);
973 			nfp->fp_flags = (ofp->fp_flags & FP_CLOEXEC);
974 		}
975 		fg_ref(p, nfp->fp_glob);
976 
977 		ofiles[i] = nfp;
978 #if CONFIG_PROC_RESOURCE_LIMITS
979 		fd_nfiles_open++;
980 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
981 	}
982 
983 	proc_fdunlock(p);
984 
985 	newfdp->fd_ofiles = ofiles;
986 	newfdp->fd_ofileflags = ofileflags;
987 	newfdp->fd_nfiles = n_files;
988 	newfdp->fd_afterlast = afterlast;
989 	newfdp->fd_freefile = freefile;
990 
991 #if CONFIG_PROC_RESOURCE_LIMITS
992 	newfdp->fd_nfiles_open = fd_nfiles_open;
993 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
994 
995 	return 0;
996 }
997 
998 void
fdt_invalidate(proc_t p)999 fdt_invalidate(proc_t p)
1000 {
1001 	struct filedesc *fdp = &p->p_fd;
1002 	struct fileproc *fp, **ofiles;
1003 	struct kqworkq *kqwq = NULL;
1004 	vnode_t vn1 = NULL, vn2 = NULL;
1005 	struct kqwllist *kqhash = NULL;
1006 	u_long kqhashmask = 0;
1007 	int n_files = 0;
1008 
1009 	/*
1010 	 * deallocate all the knotes up front and claim empty
1011 	 * tables to make any subsequent kqueue closes faster.
1012 	 */
1013 	knotes_dealloc(p);
1014 	assert(fdp->fd_knlistsize == 0);
1015 	assert(fdp->fd_knhashmask == 0);
1016 
1017 	/*
1018 	 * dealloc all workloops that have outstanding retains
1019 	 * when created with scheduling parameters.
1020 	 */
1021 	kqworkloops_dealloc(p);
1022 
1023 	proc_fdlock(p);
1024 
1025 	/* close file descriptors */
1026 	if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
1027 		for (int i = fdp->fd_afterlast; i-- > 0;) {
1028 			if ((fp = fdp->fd_ofiles[i]) != NULL) {
1029 				if (fdp->fd_ofileflags[i] & UF_RESERVED) {
1030 					panic("fdfree: found fp with UF_RESERVED");
1031 				}
1032 				fp_close_and_unlock(p, i, fp, 0);
1033 				proc_fdlock(p);
1034 			}
1035 		}
1036 	}
1037 
1038 	n_files = fdp->fd_nfiles;
1039 	ofiles = fdp->fd_ofiles;
1040 	kqwq = fdp->fd_wqkqueue;
1041 	vn1 = fdp->fd_cdir;
1042 	vn2 = fdp->fd_rdir;
1043 
1044 	fdp->fd_ofileflags = NULL;
1045 	fdp->fd_ofiles = NULL;
1046 	fdp->fd_nfiles = 0;
1047 	fdp->fd_wqkqueue = NULL;
1048 	fdp->fd_cdir = NULL;
1049 	fdp->fd_rdir = NULL;
1050 
1051 	proc_fdunlock(p);
1052 
1053 	lck_mtx_lock(&fdp->fd_knhashlock);
1054 
1055 	kqhash = fdp->fd_kqhash;
1056 	kqhashmask = fdp->fd_kqhashmask;
1057 
1058 	fdp->fd_kqhash = 0;
1059 	fdp->fd_kqhashmask = 0;
1060 
1061 	lck_mtx_unlock(&fdp->fd_knhashlock);
1062 
1063 	fd_free_files(ofiles, n_files);
1064 
1065 	if (kqwq) {
1066 		kqworkq_dealloc(kqwq);
1067 	}
1068 	if (vn1) {
1069 		vnode_rele(vn1);
1070 	}
1071 	if (vn2) {
1072 		vnode_rele(vn2);
1073 	}
1074 	if (kqhash) {
1075 		for (uint32_t i = 0; i <= kqhashmask; i++) {
1076 			assert(LIST_EMPTY(&kqhash[i]));
1077 		}
1078 		hashdestroy(kqhash, M_KQUEUE, kqhashmask);
1079 	}
1080 }
1081 
1082 
1083 struct fileproc *
fileproc_alloc_init(void)1084 fileproc_alloc_init(void)
1085 {
1086 	struct fileproc *fp;
1087 
1088 	fp = zalloc_id(ZONE_ID_FILEPROC, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1089 	os_ref_init(&fp->fp_iocount, &f_refgrp);
1090 	return fp;
1091 }
1092 
1093 
1094 void
fileproc_free(struct fileproc * fp)1095 fileproc_free(struct fileproc *fp)
1096 {
1097 	os_ref_count_t __unused refc = os_ref_release(&fp->fp_iocount);
1098 #if DEVELOPMENT || DEBUG
1099 	if (0 != refc) {
1100 		panic("%s: pid %d refc: %u != 0",
1101 		    __func__, proc_pid(current_proc()), refc);
1102 	}
1103 #endif
1104 	if (fp->fp_guard_attrs) {
1105 		guarded_fileproc_unguard(fp);
1106 	}
1107 	assert(fp->fp_wset == NULL);
1108 	zfree_id(ZONE_ID_FILEPROC, fp);
1109 }
1110 
1111 
1112 /*
1113  * Statistics counter for the number of times a process calling fdalloc()
1114  * has resulted in an expansion of the per process open file table.
1115  *
1116  * XXX This would likely be of more use if it were per process
1117  */
1118 int fdexpand;
1119 
1120 #if CONFIG_PROC_RESOURCE_LIMITS
1121 /*
1122  * Should be called only with the proc_fdlock held.
1123  */
1124 void
fd_check_limit_exceeded(struct filedesc * fdp)1125 fd_check_limit_exceeded(struct filedesc *fdp)
1126 {
1127 #if DIAGNOSTIC
1128 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1129 #endif
1130 	if (!fd_above_soft_limit_notify(fdp) && fdp->fd_nfiles_soft_limit &&
1131 	    (fdp->fd_nfiles_open > fdp->fd_nfiles_soft_limit)) {
1132 		fd_above_soft_limit_send_notification(fdp);
1133 		act_set_astproc_resource(current_thread());
1134 	} else if (!fd_above_hard_limit_notify(fdp) && fdp->fd_nfiles_hard_limit &&
1135 	    (fdp->fd_nfiles_open > fdp->fd_nfiles_hard_limit)) {
1136 		fd_above_hard_limit_send_notification(fdp);
1137 		act_set_astproc_resource(current_thread());
1138 	}
1139 }
1140 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1141 
1142 /*
1143  * fdalloc
1144  *
1145  * Description:	Allocate a file descriptor for the process.
1146  *
1147  * Parameters:	p				Process to allocate the fd in
1148  *		want				The fd we would prefer to get
1149  *		result				Pointer to fd we got
1150  *
1151  * Returns:	0				Success
1152  *		EMFILE
1153  *		ENOMEM
1154  *
1155  * Implicit returns:
1156  *		*result (modified)		The fd which was allocated
1157  */
1158 int
fdalloc(proc_t p,int want,int * result)1159 fdalloc(proc_t p, int want, int *result)
1160 {
1161 	struct filedesc *fdp = &p->p_fd;
1162 	int i;
1163 	int last, numfiles, oldnfiles;
1164 	struct fileproc **newofiles, **ofiles;
1165 	char *newofileflags;
1166 	int lim = proc_limitgetcur_nofile(p);
1167 
1168 	/*
1169 	 * Search for a free descriptor starting at the higher
1170 	 * of want or fd_freefile.  If that fails, consider
1171 	 * expanding the ofile array.
1172 	 */
1173 #if DIAGNOSTIC
1174 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1175 #endif
1176 
1177 	for (;;) {
1178 		last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
1179 		if ((i = want) < fdp->fd_freefile) {
1180 			i = fdp->fd_freefile;
1181 		}
1182 		for (; i < last; i++) {
1183 			if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
1184 				procfdtbl_reservefd(p, i);
1185 				if (i >= fdp->fd_afterlast) {
1186 					fdp->fd_afterlast = i + 1;
1187 				}
1188 				if (want <= fdp->fd_freefile) {
1189 					fdp->fd_freefile = i;
1190 				}
1191 				*result = i;
1192 #if CONFIG_PROC_RESOURCE_LIMITS
1193 				fdp->fd_nfiles_open++;
1194 				fd_check_limit_exceeded(fdp);
1195 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1196 				return 0;
1197 			}
1198 		}
1199 
1200 		/*
1201 		 * No space in current array.  Expand?
1202 		 */
1203 		if ((rlim_t)fdp->fd_nfiles >= lim) {
1204 			return EMFILE;
1205 		}
1206 		if (fdp->fd_nfiles < NDEXTENT) {
1207 			numfiles = NDEXTENT;
1208 		} else {
1209 			numfiles = 2 * fdp->fd_nfiles;
1210 		}
1211 		/* Enforce lim */
1212 		if ((rlim_t)numfiles > lim) {
1213 			numfiles = (int)lim;
1214 		}
1215 		proc_fdunlock(p);
1216 		newofiles = fd_alloc_files(numfiles, Z_WAITOK);
1217 		proc_fdlock(p);
1218 		if (newofiles == NULL) {
1219 			return ENOMEM;
1220 		}
1221 		if (fdp->fd_nfiles >= numfiles) {
1222 			fd_free_files(newofiles, numfiles);
1223 			continue;
1224 		}
1225 		newofileflags = (char *) &newofiles[numfiles];
1226 		/*
1227 		 * Copy the existing ofile and ofileflags arrays
1228 		 * and zero the new portion of each array.
1229 		 */
1230 		oldnfiles = fdp->fd_nfiles;
1231 		(void) memcpy(newofiles, fdp->fd_ofiles,
1232 		    oldnfiles * sizeof(*fdp->fd_ofiles));
1233 		(void) memset(&newofiles[oldnfiles], 0,
1234 		    (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
1235 
1236 		(void) memcpy(newofileflags, fdp->fd_ofileflags,
1237 		    oldnfiles * sizeof(*fdp->fd_ofileflags));
1238 		(void) memset(&newofileflags[oldnfiles], 0,
1239 		    (numfiles - oldnfiles) *
1240 		    sizeof(*fdp->fd_ofileflags));
1241 		ofiles = fdp->fd_ofiles;
1242 		fdp->fd_ofiles = newofiles;
1243 		fdp->fd_ofileflags = newofileflags;
1244 		fdp->fd_nfiles = numfiles;
1245 		fd_free_files(ofiles, oldnfiles);
1246 		fdexpand++;
1247 	}
1248 }
1249 
1250 
1251 #pragma mark fileprocs
1252 
1253 void
fileproc_modify_vflags(struct fileproc * fp,fileproc_vflags_t vflags,boolean_t clearflags)1254 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
1255 {
1256 	if (clearflags) {
1257 		os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
1258 	} else {
1259 		os_atomic_or(&fp->fp_vflags, vflags, relaxed);
1260 	}
1261 }
1262 
1263 fileproc_vflags_t
fileproc_get_vflags(struct fileproc * fp)1264 fileproc_get_vflags(struct fileproc *fp)
1265 {
1266 	return os_atomic_load(&fp->fp_vflags, relaxed);
1267 }
1268 
1269 /*
1270  * falloc_withinit
1271  *
1272  * Create a new open file structure and allocate
1273  * a file descriptor for the process that refers to it.
1274  *
1275  * Returns:	0			Success
1276  *
1277  * Description:	Allocate an entry in the per process open file table and
1278  *		return the corresponding fileproc and fd.
1279  *
1280  * Parameters:	p				The process in whose open file
1281  *						table the fd is to be allocated
1282  *		resultfp			Pointer to fileproc pointer
1283  *						return area
1284  *		resultfd			Pointer to fd return area
1285  *		ctx				VFS context
1286  *		fp_zalloc			fileproc allocator to use
1287  *		crarg				allocator args
1288  *
1289  * Returns:	0				Success
1290  *		ENFILE				Too many open files in system
1291  *		fdalloc:EMFILE			Too many open files in process
1292  *		fdalloc:ENOMEM			M_OFILETABL zone exhausted
1293  *		ENOMEM				fp_zone or fg_zone zone
1294  *						exhausted
1295  *
1296  * Implicit returns:
1297  *		*resultfd (modified)		Returned fileproc pointer
1298  *		*resultfd (modified)		Returned fd
1299  *
1300  * Notes:	This function takes separate process and context arguments
1301  *		solely to support kern_exec.c; otherwise, it would take
1302  *		neither, and use the vfs_context_current() routine internally.
1303  */
1304 int
falloc_withinit(proc_t p,struct fileproc ** resultfp,int * resultfd,vfs_context_t ctx,fp_initfn_t fp_init,void * initarg)1305 falloc_withinit(proc_t p, struct fileproc **resultfp, int *resultfd,
1306     vfs_context_t ctx, fp_initfn_t fp_init, void *initarg)
1307 {
1308 	struct fileproc *fp;
1309 	struct fileglob *fg;
1310 	int error, nfd;
1311 #if CONFIG_MACF
1312 	kauth_cred_t cred;
1313 #endif
1314 
1315 	/* Make sure we don't go beyond the system-wide limit */
1316 	if (nfiles >= maxfiles) {
1317 		tablefull("file");
1318 		return ENFILE;
1319 	}
1320 
1321 	proc_fdlock(p);
1322 
1323 	/* fdalloc will make sure the process stays below per-process limit */
1324 	if ((error = fdalloc(p, 0, &nfd))) {
1325 		proc_fdunlock(p);
1326 		return error;
1327 	}
1328 
1329 #if CONFIG_MACF
1330 	cred = kauth_cred_proc_ref(p);
1331 	error = mac_file_check_create(cred);
1332 	kauth_cred_unref(&cred);
1333 	if (error) {
1334 		proc_fdunlock(p);
1335 		return error;
1336 	}
1337 #endif
1338 
1339 	/*
1340 	 * Allocate a new file descriptor.
1341 	 * If the process has file descriptor zero open, add to the list
1342 	 * of open files at that point, otherwise put it at the front of
1343 	 * the list of open files.
1344 	 */
1345 	proc_fdunlock(p);
1346 
1347 	fp = fileproc_alloc_init();
1348 	if (fp_init) {
1349 		fp_init(fp, initarg);
1350 	}
1351 
1352 	fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
1353 	lck_mtx_init(&fg->fg_lock, &file_lck_grp, LCK_ATTR_NULL);
1354 
1355 	os_ref_retain_locked(&fp->fp_iocount);
1356 	os_ref_init_raw(&fg->fg_count, &f_refgrp);
1357 	fg->fg_ops = &uninitops;
1358 	fp->fp_glob = fg;
1359 
1360 	kauth_cred_ref(ctx->vc_ucred);
1361 
1362 	fp->f_cred = ctx->vc_ucred;
1363 
1364 	os_atomic_inc(&nfiles, relaxed);
1365 
1366 	proc_fdlock(p);
1367 
1368 	p->p_fd.fd_ofiles[nfd] = fp;
1369 
1370 	proc_fdunlock(p);
1371 
1372 	if (resultfp) {
1373 		*resultfp = fp;
1374 	}
1375 	if (resultfd) {
1376 		*resultfd = nfd;
1377 	}
1378 
1379 	return 0;
1380 }
1381 
1382 int
falloc(proc_t p,struct fileproc ** resultfp,int * resultfd,vfs_context_t ctx)1383 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
1384 {
1385 	return falloc_withinit(p, resultfp, resultfd, ctx, NULL, NULL);
1386 }
1387 
1388 
1389 /*
1390  * fp_free
1391  *
1392  * Description:	Release the fd and free the fileproc associated with the fd
1393  *		in the per process open file table of the specified process;
1394  *		these values must correspond.
1395  *
1396  * Parameters:	p				Process containing fd
1397  *		fd				fd to be released
1398  *		fp				fileproc to be freed
1399  */
1400 void
fp_free(proc_t p,int fd,struct fileproc * fp)1401 fp_free(proc_t p, int fd, struct fileproc * fp)
1402 {
1403 	proc_fdlock_spin(p);
1404 	fdrelse(p, fd);
1405 	proc_fdunlock(p);
1406 
1407 	fg_free(fp->fp_glob);
1408 	os_ref_release_live(&fp->fp_iocount);
1409 	fileproc_free(fp);
1410 }
1411 
1412 
1413 struct fileproc *
fp_get_noref_locked(proc_t p,int fd)1414 fp_get_noref_locked(proc_t p, int fd)
1415 {
1416 	struct filedesc *fdp = &p->p_fd;
1417 	struct fileproc *fp;
1418 
1419 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1420 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1421 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1422 		return NULL;
1423 	}
1424 
1425 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1426 	return fp;
1427 }
1428 
1429 struct fileproc *
fp_get_noref_locked_with_iocount(proc_t p,int fd)1430 fp_get_noref_locked_with_iocount(proc_t p, int fd)
1431 {
1432 	struct filedesc *fdp = &p->p_fd;
1433 	struct fileproc *fp = NULL;
1434 
1435 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1436 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1437 	    os_ref_get_count(&fp->fp_iocount) <= 1 ||
1438 	    ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1439 	    !(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
1440 		panic("%s: caller without an ioccount on fileproc (%d/:%p)",
1441 		    __func__, fd, fp);
1442 	}
1443 
1444 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1445 	return fp;
1446 }
1447 
1448 
1449 /*
1450  * fp_lookup
1451  *
1452  * Description:	Get fileproc pointer for a given fd from the per process
1453  *		open file table of the specified process and if successful,
1454  *		increment the fp_iocount
1455  *
1456  * Parameters:	p				Process in which fd lives
1457  *		fd				fd to get information for
1458  *		resultfp			Pointer to result fileproc
1459  *						pointer area, or 0 if none
1460  *		locked				!0 if the caller holds the
1461  *						proc_fdlock, 0 otherwise
1462  *
1463  * Returns:	0			Success
1464  *		EBADF			Bad file descriptor
1465  *
1466  * Implicit returns:
1467  *		*resultfp (modified)		Fileproc pointer
1468  *
1469  * Locks:	If the argument 'locked' is non-zero, then the caller is
1470  *		expected to have taken and held the proc_fdlock; if it is
1471  *		zero, than this routine internally takes and drops this lock.
1472  */
1473 int
fp_lookup(proc_t p,int fd,struct fileproc ** resultfp,int locked)1474 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
1475 {
1476 	struct filedesc *fdp = &p->p_fd;
1477 	struct fileproc *fp;
1478 
1479 	if (!locked) {
1480 		proc_fdlock_spin(p);
1481 	}
1482 	if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
1483 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1484 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1485 		if (!locked) {
1486 			proc_fdunlock(p);
1487 		}
1488 		return EBADF;
1489 	}
1490 
1491 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1492 	os_ref_retain_locked(&fp->fp_iocount);
1493 
1494 	if (resultfp) {
1495 		*resultfp = fp;
1496 	}
1497 	if (!locked) {
1498 		proc_fdunlock(p);
1499 	}
1500 
1501 	return 0;
1502 }
1503 
1504 
1505 int
fp_get_ftype(proc_t p,int fd,file_type_t ftype,int err,struct fileproc ** fpp)1506 fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
1507 {
1508 	struct filedesc *fdp = &p->p_fd;
1509 	struct fileproc *fp;
1510 
1511 	proc_fdlock_spin(p);
1512 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1513 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1514 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1515 		proc_fdunlock(p);
1516 		return EBADF;
1517 	}
1518 
1519 	if (fp->f_type != ftype) {
1520 		proc_fdunlock(p);
1521 		return err;
1522 	}
1523 
1524 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1525 	os_ref_retain_locked(&fp->fp_iocount);
1526 	proc_fdunlock(p);
1527 
1528 	*fpp = fp;
1529 	return 0;
1530 }
1531 
1532 
1533 /*
1534  * fp_drop
1535  *
1536  * Description:	Drop the I/O reference previously taken by calling fp_lookup
1537  *		et. al.
1538  *
1539  * Parameters:	p				Process in which the fd lives
1540  *		fd				fd associated with the fileproc
1541  *		fp				fileproc on which to set the
1542  *						flag and drop the reference
1543  *		locked				flag to internally take and
1544  *						drop proc_fdlock if it is not
1545  *						already held by the caller
1546  *
1547  * Returns:	0				Success
1548  *		EBADF				Bad file descriptor
1549  *
1550  * Locks:	This function internally takes and drops the proc_fdlock for
1551  *		the supplied process if 'locked' is non-zero, and assumes that
1552  *		the caller already holds this lock if 'locked' is non-zero.
1553  *
1554  * Notes:	The fileproc must correspond to the fd in the supplied proc
1555  */
1556 int
fp_drop(proc_t p,int fd,struct fileproc * fp,int locked)1557 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
1558 {
1559 	struct filedesc *fdp = &p->p_fd;
1560 	int     needwakeup = 0;
1561 
1562 	if (!locked) {
1563 		proc_fdlock_spin(p);
1564 	}
1565 	if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
1566 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1567 	    ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1568 	    !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
1569 		if (!locked) {
1570 			proc_fdunlock(p);
1571 		}
1572 		return EBADF;
1573 	}
1574 
1575 	if (1 == os_ref_release_locked(&fp->fp_iocount)) {
1576 		if (fp->fp_flags & FP_SELCONFLICT) {
1577 			fp->fp_flags &= ~FP_SELCONFLICT;
1578 		}
1579 
1580 		if (fdp->fd_fpdrainwait) {
1581 			fdp->fd_fpdrainwait = 0;
1582 			needwakeup = 1;
1583 		}
1584 	}
1585 	if (!locked) {
1586 		proc_fdunlock(p);
1587 	}
1588 	if (needwakeup) {
1589 		wakeup(&fdp->fd_fpdrainwait);
1590 	}
1591 
1592 	return 0;
1593 }
1594 
1595 
1596 /*
1597  * fileproc_drain
1598  *
1599  * Description:	Drain out pending I/O operations
1600  *
1601  * Parameters:	p				Process closing this file
1602  *		fp				fileproc struct for the open
1603  *						instance on the file
1604  *
1605  * Returns:	void
1606  *
1607  * Locks:	Assumes the caller holds the proc_fdlock
1608  *
1609  * Notes:	For character devices, this occurs on the last close of the
1610  *		device; for all other file descriptors, this occurs on each
1611  *		close to prevent fd's from being closed out from under
1612  *		operations currently in progress and blocked
1613  *
1614  * See Also:    file_vnode(), file_socket(), file_drop(), and the cautions
1615  *		regarding their use and interaction with this function.
1616  */
1617 static void
fileproc_drain(proc_t p,struct fileproc * fp)1618 fileproc_drain(proc_t p, struct fileproc * fp)
1619 {
1620 	struct filedesc *fdp = &p->p_fd;
1621 	struct vfs_context context;
1622 	thread_t thread;
1623 	bool is_current_proc;
1624 
1625 	is_current_proc = (p == current_proc());
1626 
1627 	if (!is_current_proc) {
1628 		proc_lock(p);
1629 		thread = proc_thread(p); /* XXX */
1630 		thread_reference(thread);
1631 		proc_unlock(p);
1632 	} else {
1633 		thread = current_thread();
1634 	}
1635 
1636 	context.vc_thread = thread;
1637 	context.vc_ucred = fp->fp_glob->fg_cred;
1638 
1639 	/* Set the vflag for drain */
1640 	fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
1641 
1642 	while (os_ref_get_count(&fp->fp_iocount) > 1) {
1643 		lck_mtx_convert_spin(&fdp->fd_lock);
1644 
1645 		fo_drain(fp, &context);
1646 		if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
1647 			struct select_set *selset;
1648 
1649 			if (fp->fp_guard_attrs) {
1650 				selset = fp->fp_guard->fpg_wset;
1651 			} else {
1652 				selset = fp->fp_wset;
1653 			}
1654 			if (waitq_wakeup64_all(selset, NO_EVENT64,
1655 			    THREAD_INTERRUPTED, WAITQ_WAKEUP_DEFAULT) == KERN_INVALID_ARGUMENT) {
1656 				panic("bad wait queue for waitq_wakeup64_all %p (%sfp:%p)",
1657 				    selset, fp->fp_guard_attrs ? "guarded " : "", fp);
1658 			}
1659 		}
1660 		if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1661 			if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
1662 			    THREAD_INTERRUPTED, WAITQ_WAKEUP_DEFAULT) == KERN_INVALID_ARGUMENT) {
1663 				panic("bad select_conflict_queue");
1664 			}
1665 		}
1666 		fdp->fd_fpdrainwait = 1;
1667 		msleep(&fdp->fd_fpdrainwait, &fdp->fd_lock, PRIBIO, "fpdrain", NULL);
1668 	}
1669 #if DIAGNOSTIC
1670 	if ((fp->fp_flags & FP_INSELECT) != 0) {
1671 		panic("FP_INSELECT set on drained fp");
1672 	}
1673 #endif
1674 	if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1675 		fp->fp_flags &= ~FP_SELCONFLICT;
1676 	}
1677 
1678 	if (!is_current_proc) {
1679 		thread_deallocate(thread);
1680 	}
1681 }
1682 
1683 
1684 int
fp_close_and_unlock(proc_t p,int fd,struct fileproc * fp,int flags)1685 fp_close_and_unlock(proc_t p, int fd, struct fileproc *fp, int flags)
1686 {
1687 	struct filedesc *fdp = &p->p_fd;
1688 	struct fileglob *fg = fp->fp_glob;
1689 #if CONFIG_MACF
1690 	kauth_cred_t cred;
1691 #endif
1692 
1693 #if DIAGNOSTIC
1694 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1695 #endif
1696 
1697 	/*
1698 	 * Keep most people from finding the filedesc while we are closing it.
1699 	 *
1700 	 * Callers are:
1701 	 *
1702 	 * - dup2() which always waits for UF_RESERVED to clear
1703 	 *
1704 	 * - close/guarded_close/... who will fail the fileproc lookup if
1705 	 *   UF_RESERVED is set,
1706 	 *
1707 	 * - fdexec()/fdfree() who only run once all threads in the proc
1708 	 *   are properly canceled, hence no fileproc in this proc should
1709 	 *   be in flux.
1710 	 *
1711 	 * Which means that neither UF_RESERVED nor UF_CLOSING should be set.
1712 	 *
1713 	 * Callers of fp_get_noref_locked_with_iocount() can still find
1714 	 * this entry so that they can drop their I/O reference despite
1715 	 * not having remembered the fileproc pointer (namely select() and
1716 	 * file_drop()).
1717 	 */
1718 	if (p->p_fd.fd_ofileflags[fd] & (UF_RESERVED | UF_CLOSING)) {
1719 		panic("%s: called with fileproc in flux (%d/:%p)",
1720 		    __func__, fd, fp);
1721 	}
1722 	p->p_fd.fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
1723 
1724 	if ((fp->fp_flags & FP_AIOISSUED) ||
1725 #if CONFIG_MACF
1726 	    (FILEGLOB_DTYPE(fg) == DTYPE_VNODE)
1727 #else
1728 	    kauth_authorize_fileop_has_listeners()
1729 #endif
1730 	    ) {
1731 		proc_fdunlock(p);
1732 
1733 		if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1734 			/*
1735 			 * call out to allow 3rd party notification of close.
1736 			 * Ignore result of kauth_authorize_fileop call.
1737 			 */
1738 #if CONFIG_MACF
1739 			cred = kauth_cred_proc_ref(p);
1740 			mac_file_notify_close(cred, fp->fp_glob);
1741 			kauth_cred_unref(&cred);
1742 #endif
1743 
1744 			if (kauth_authorize_fileop_has_listeners() &&
1745 			    vnode_getwithref((vnode_t)fg_get_data(fg)) == 0) {
1746 				u_int   fileop_flags = 0;
1747 				if (fg->fg_flag & FWASWRITTEN) {
1748 					fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
1749 				}
1750 				kauth_authorize_fileop(fg->fg_cred, KAUTH_FILEOP_CLOSE,
1751 				    (uintptr_t)fg_get_data(fg), (uintptr_t)fileop_flags);
1752 
1753 				vnode_put((vnode_t)fg_get_data(fg));
1754 			}
1755 		}
1756 
1757 		if (fp->fp_flags & FP_AIOISSUED) {
1758 			/*
1759 			 * cancel all async IO requests that can be cancelled.
1760 			 */
1761 			_aio_close( p, fd );
1762 		}
1763 
1764 		proc_fdlock(p);
1765 	}
1766 
1767 	if (fd < fdp->fd_knlistsize) {
1768 		knote_fdclose(p, fd);
1769 	}
1770 
1771 	fileproc_drain(p, fp);
1772 
1773 	if (flags & FD_DUP2RESV) {
1774 		fdp->fd_ofiles[fd] = NULL;
1775 		fdp->fd_ofileflags[fd] &= ~UF_CLOSING;
1776 	} else {
1777 		fdrelse(p, fd);
1778 	}
1779 
1780 	proc_fdunlock(p);
1781 
1782 	if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
1783 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
1784 		    fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fg_get_data(fg)));
1785 	}
1786 
1787 	fileproc_free(fp);
1788 
1789 	return fg_drop(p, fg);
1790 }
1791 
1792 /*
1793  * dupfdopen
1794  *
1795  * Description:	Duplicate the specified descriptor to a free descriptor;
1796  *		this is the second half of fdopen(), above.
1797  *
1798  * Parameters:	p				current process pointer
1799  *		indx				fd to dup to
1800  *		dfd				fd to dup from
1801  *		mode				mode to set on new fd
1802  *		error				command code
1803  *
1804  * Returns:	0				Success
1805  *		EBADF				Source fd is bad
1806  *		EACCES				Requested mode not allowed
1807  *		!0				'error', if not ENODEV or
1808  *						ENXIO
1809  *
1810  * Notes:	XXX This is not thread safe; see fdopen() above
1811  */
1812 int
dupfdopen(proc_t p,int indx,int dfd,int flags,int error)1813 dupfdopen(proc_t p, int indx, int dfd, int flags, int error)
1814 {
1815 	struct filedesc *fdp = &p->p_fd;
1816 	struct fileproc *wfp;
1817 	struct fileproc *fp;
1818 #if CONFIG_MACF
1819 	int myerror;
1820 #endif
1821 
1822 	/*
1823 	 * If the to-be-dup'd fd number is greater than the allowed number
1824 	 * of file descriptors, or the fd to be dup'd has already been
1825 	 * closed, reject.  Note, check for new == old is necessary as
1826 	 * falloc could allocate an already closed to-be-dup'd descriptor
1827 	 * as the new descriptor.
1828 	 */
1829 	proc_fdlock(p);
1830 
1831 	fp = fdp->fd_ofiles[indx];
1832 	if (dfd < 0 || dfd >= fdp->fd_nfiles ||
1833 	    (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
1834 	    (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
1835 		proc_fdunlock(p);
1836 		return EBADF;
1837 	}
1838 #if CONFIG_MACF
1839 	myerror = mac_file_check_dup(kauth_cred_get(), wfp->fp_glob, dfd);
1840 	if (myerror) {
1841 		proc_fdunlock(p);
1842 		return myerror;
1843 	}
1844 #endif
1845 	/*
1846 	 * There are two cases of interest here.
1847 	 *
1848 	 * For ENODEV simply dup (dfd) to file descriptor
1849 	 * (indx) and return.
1850 	 *
1851 	 * For ENXIO steal away the file structure from (dfd) and
1852 	 * store it in (indx).  (dfd) is effectively closed by
1853 	 * this operation.
1854 	 *
1855 	 * Any other error code is just returned.
1856 	 */
1857 	switch (error) {
1858 	case ENODEV:
1859 		if (fp_isguarded(wfp, GUARD_DUP)) {
1860 			proc_fdunlock(p);
1861 			return EPERM;
1862 		}
1863 
1864 		/*
1865 		 * Check that the mode the file is being opened for is a
1866 		 * subset of the mode of the existing descriptor.
1867 		 */
1868 		if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1869 			proc_fdunlock(p);
1870 			return EACCES;
1871 		}
1872 		if (indx >= fdp->fd_afterlast) {
1873 			fdp->fd_afterlast = indx + 1;
1874 		}
1875 
1876 		if (fp->fp_glob) {
1877 			fg_free(fp->fp_glob);
1878 		}
1879 		fg_ref(p, wfp->fp_glob);
1880 		fp->fp_glob = wfp->fp_glob;
1881 		/*
1882 		 * Historically, open(/dev/fd/<n>) preserves close on fork/exec,
1883 		 * unlike dup(), dup2() or fcntl(F_DUPFD).
1884 		 *
1885 		 * open1() already handled O_CLO{EXEC,FORK}
1886 		 */
1887 		fp->fp_flags |= (wfp->fp_flags & (FP_CLOFORK | FP_CLOEXEC));
1888 
1889 		procfdtbl_releasefd(p, indx, NULL);
1890 		fp_drop(p, indx, fp, 1);
1891 		proc_fdunlock(p);
1892 		return 0;
1893 
1894 	default:
1895 		proc_fdunlock(p);
1896 		return error;
1897 	}
1898 	/* NOTREACHED */
1899 }
1900 
1901 
1902 #pragma mark KPIS (sys/file.h)
1903 
1904 /*
1905  * fg_get_vnode
1906  *
1907  * Description:	Return vnode associated with the file structure, if
1908  *		any.  The lifetime of the returned vnode is bound to
1909  *		the lifetime of the file structure.
1910  *
1911  * Parameters:	fg				Pointer to fileglob to
1912  *						inspect
1913  *
1914  * Returns:	vnode_t
1915  */
1916 vnode_t
fg_get_vnode(struct fileglob * fg)1917 fg_get_vnode(struct fileglob *fg)
1918 {
1919 	if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1920 		return (vnode_t)fg_get_data(fg);
1921 	} else {
1922 		return NULL;
1923 	}
1924 }
1925 
1926 
1927 /*
1928  * fp_getfvp
1929  *
1930  * Description:	Get fileproc and vnode pointer for a given fd from the per
1931  *		process open file table of the specified process, and if
1932  *		successful, increment the fp_iocount
1933  *
1934  * Parameters:	p				Process in which fd lives
1935  *		fd				fd to get information for
1936  *		resultfp			Pointer to result fileproc
1937  *						pointer area, or 0 if none
1938  *		resultvp			Pointer to result vnode pointer
1939  *						area, or 0 if none
1940  *
1941  * Returns:	0				Success
1942  *		EBADF				Bad file descriptor
1943  *		ENOTSUP				fd does not refer to a vnode
1944  *
1945  * Implicit returns:
1946  *		*resultfp (modified)		Fileproc pointer
1947  *		*resultvp (modified)		vnode pointer
1948  *
1949  * Notes:	The resultfp and resultvp fields are optional, and may be
1950  *		independently specified as NULL to skip returning information
1951  *
1952  * Locks:	Internally takes and releases proc_fdlock
1953  */
1954 int
fp_getfvp(proc_t p,int fd,struct fileproc ** resultfp,struct vnode ** resultvp)1955 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
1956 {
1957 	struct fileproc *fp;
1958 	int error;
1959 
1960 	error = fp_get_ftype(p, fd, DTYPE_VNODE, ENOTSUP, &fp);
1961 	if (error == 0) {
1962 		if (resultfp) {
1963 			*resultfp = fp;
1964 		}
1965 		if (resultvp) {
1966 			*resultvp = (struct vnode *)fp_get_data(fp);
1967 		}
1968 	}
1969 
1970 	return error;
1971 }
1972 
1973 
1974 /*
1975  * fp_get_pipe_id
1976  *
1977  * Description:	Get pipe id for a given fd from the per process open file table
1978  *		of the specified process.
1979  *
1980  * Parameters:	p				Process in which fd lives
1981  *		fd				fd to get information for
1982  *		result_pipe_id			Pointer to result pipe id
1983  *
1984  * Returns:	0				Success
1985  *		EIVAL				NULL pointer arguments passed
1986  *		fp_lookup:EBADF			Bad file descriptor
1987  *		ENOTSUP				fd does not refer to a pipe
1988  *
1989  * Implicit returns:
1990  *		*result_pipe_id (modified)	pipe id
1991  *
1992  * Locks:	Internally takes and releases proc_fdlock
1993  */
1994 int
fp_get_pipe_id(proc_t p,int fd,uint64_t * result_pipe_id)1995 fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
1996 {
1997 	struct fileproc *fp = FILEPROC_NULL;
1998 	struct fileglob *fg = NULL;
1999 	int error = 0;
2000 
2001 	if (p == NULL || result_pipe_id == NULL) {
2002 		return EINVAL;
2003 	}
2004 
2005 	proc_fdlock(p);
2006 	if ((error = fp_lookup(p, fd, &fp, 1))) {
2007 		proc_fdunlock(p);
2008 		return error;
2009 	}
2010 	fg = fp->fp_glob;
2011 
2012 	if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
2013 		*result_pipe_id = pipe_id((struct pipe*)fg_get_data(fg));
2014 	} else {
2015 		error = ENOTSUP;
2016 	}
2017 
2018 	fp_drop(p, fd, fp, 1);
2019 	proc_fdunlock(p);
2020 	return error;
2021 }
2022 
2023 
2024 /*
2025  * file_vnode
2026  *
2027  * Description:	Given an fd, look it up in the current process's per process
2028  *		open file table, and return its internal vnode pointer.
2029  *
2030  * Parameters:	fd				fd to obtain vnode from
2031  *		vpp				pointer to vnode return area
2032  *
2033  * Returns:	0				Success
2034  *		EINVAL				The fd does not refer to a
2035  *						vnode fileproc entry
2036  *	fp_lookup:EBADF				Bad file descriptor
2037  *
2038  * Implicit returns:
2039  *		*vpp (modified)			Returned vnode pointer
2040  *
2041  * Locks:	This function internally takes and drops the proc_fdlock for
2042  *		the current process
2043  *
2044  * Notes:	If successful, this function increments the fp_iocount on the
2045  *		fd's corresponding fileproc.
2046  *
2047  *		The fileproc referenced is not returned; because of this, care
2048  *		must be taken to not drop the last reference (e.g. by closing
2049  *		the file).  This is inherently unsafe, since the reference may
2050  *		not be recoverable from the vnode, if there is a subsequent
2051  *		close that destroys the associate fileproc.  The caller should
2052  *		therefore retain their own reference on the fileproc so that
2053  *		the fp_iocount can be dropped subsequently.  Failure to do this
2054  *		can result in the returned pointer immediately becoming invalid
2055  *		following the call.
2056  *
2057  *		Use of this function is discouraged.
2058  */
2059 int
file_vnode(int fd,struct vnode ** vpp)2060 file_vnode(int fd, struct vnode **vpp)
2061 {
2062 	return file_vnode_withvid(fd, vpp, NULL);
2063 }
2064 
2065 
2066 /*
2067  * file_vnode_withvid
2068  *
2069  * Description:	Given an fd, look it up in the current process's per process
2070  *		open file table, and return its internal vnode pointer.
2071  *
2072  * Parameters:	fd				fd to obtain vnode from
2073  *		vpp				pointer to vnode return area
2074  *		vidp				pointer to vid of the returned vnode
2075  *
2076  * Returns:	0				Success
2077  *		EINVAL				The fd does not refer to a
2078  *						vnode fileproc entry
2079  *	fp_lookup:EBADF				Bad file descriptor
2080  *
2081  * Implicit returns:
2082  *		*vpp (modified)			Returned vnode pointer
2083  *
2084  * Locks:	This function internally takes and drops the proc_fdlock for
2085  *		the current process
2086  *
2087  * Notes:	If successful, this function increments the fp_iocount on the
2088  *		fd's corresponding fileproc.
2089  *
2090  *		The fileproc referenced is not returned; because of this, care
2091  *		must be taken to not drop the last reference (e.g. by closing
2092  *		the file).  This is inherently unsafe, since the reference may
2093  *		not be recoverable from the vnode, if there is a subsequent
2094  *		close that destroys the associate fileproc.  The caller should
2095  *		therefore retain their own reference on the fileproc so that
2096  *		the fp_iocount can be dropped subsequently.  Failure to do this
2097  *		can result in the returned pointer immediately becoming invalid
2098  *		following the call.
2099  *
2100  *		Use of this function is discouraged.
2101  */
2102 int
file_vnode_withvid(int fd,struct vnode ** vpp,uint32_t * vidp)2103 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t *vidp)
2104 {
2105 	struct fileproc *fp;
2106 	int error;
2107 
2108 	error = fp_get_ftype(current_proc(), fd, DTYPE_VNODE, EINVAL, &fp);
2109 	if (error == 0) {
2110 		if (vpp) {
2111 			*vpp = (struct vnode *)fp_get_data(fp);
2112 		}
2113 		if (vidp) {
2114 			*vidp = vnode_vid((struct vnode *)fp_get_data(fp));
2115 		}
2116 	}
2117 	return error;
2118 }
2119 
2120 /*
2121  * file_socket
2122  *
2123  * Description:	Given an fd, look it up in the current process's per process
2124  *		open file table, and return its internal socket pointer.
2125  *
2126  * Parameters:	fd				fd to obtain vnode from
2127  *		sp				pointer to socket return area
2128  *
2129  * Returns:	0				Success
2130  *		ENOTSOCK			Not a socket
2131  *		fp_lookup:EBADF			Bad file descriptor
2132  *
2133  * Implicit returns:
2134  *		*sp (modified)			Returned socket pointer
2135  *
2136  * Locks:	This function internally takes and drops the proc_fdlock for
2137  *		the current process
2138  *
2139  * Notes:	If successful, this function increments the fp_iocount on the
2140  *		fd's corresponding fileproc.
2141  *
2142  *		The fileproc referenced is not returned; because of this, care
2143  *		must be taken to not drop the last reference (e.g. by closing
2144  *		the file).  This is inherently unsafe, since the reference may
2145  *		not be recoverable from the socket, if there is a subsequent
2146  *		close that destroys the associate fileproc.  The caller should
2147  *		therefore retain their own reference on the fileproc so that
2148  *		the fp_iocount can be dropped subsequently.  Failure to do this
2149  *		can result in the returned pointer immediately becoming invalid
2150  *		following the call.
2151  *
2152  *		Use of this function is discouraged.
2153  */
2154 int
file_socket(int fd,struct socket ** sp)2155 file_socket(int fd, struct socket **sp)
2156 {
2157 	struct fileproc *fp;
2158 	int error;
2159 
2160 	error = fp_get_ftype(current_proc(), fd, DTYPE_SOCKET, ENOTSOCK, &fp);
2161 	if (error == 0) {
2162 		if (sp) {
2163 			*sp = (struct socket *)fp_get_data(fp);
2164 		}
2165 	}
2166 	return error;
2167 }
2168 
2169 
2170 /*
2171  * file_flags
2172  *
2173  * Description:	Given an fd, look it up in the current process's per process
2174  *		open file table, and return its fileproc's flags field.
2175  *
2176  * Parameters:	fd				fd whose flags are to be
2177  *						retrieved
2178  *		flags				pointer to flags data area
2179  *
2180  * Returns:	0				Success
2181  *		ENOTSOCK			Not a socket
2182  *		fp_lookup:EBADF			Bad file descriptor
2183  *
2184  * Implicit returns:
2185  *		*flags (modified)		Returned flags field
2186  *
2187  * Locks:	This function internally takes and drops the proc_fdlock for
2188  *		the current process
2189  */
2190 int
file_flags(int fd,int * flags)2191 file_flags(int fd, int *flags)
2192 {
2193 	proc_t p = current_proc();
2194 	struct fileproc *fp;
2195 	int error = EBADF;
2196 
2197 	proc_fdlock_spin(p);
2198 	fp = fp_get_noref_locked(p, fd);
2199 	if (fp) {
2200 		*flags = (int)fp->f_flag;
2201 		error = 0;
2202 	}
2203 	proc_fdunlock(p);
2204 
2205 	return error;
2206 }
2207 
2208 
2209 /*
2210  * file_drop
2211  *
2212  * Description:	Drop an iocount reference on an fd, and wake up any waiters
2213  *		for draining (i.e. blocked in fileproc_drain() called during
2214  *		the last attempt to close a file).
2215  *
2216  * Parameters:	fd				fd on which an ioreference is
2217  *						to be dropped
2218  *
2219  * Returns:	0				Success
2220  *
2221  * Description:	Given an fd, look it up in the current process's per process
2222  *		open file table, and drop it's fileproc's fp_iocount by one
2223  *
2224  * Notes:	This is intended as a corresponding operation to the functions
2225  *		file_vnode() and file_socket() operations.
2226  *
2227  *		If the caller can't possibly hold an I/O reference,
2228  *		this function will panic the kernel rather than allowing
2229  *		for memory corruption. Callers should always call this
2230  *		because they acquired an I/O reference on this file before.
2231  *
2232  *		Use of this function is discouraged.
2233  */
2234 int
file_drop(int fd)2235 file_drop(int fd)
2236 {
2237 	struct fileproc *fp;
2238 	proc_t p = current_proc();
2239 	struct filedesc *fdp = &p->p_fd;
2240 	int     needwakeup = 0;
2241 
2242 	proc_fdlock_spin(p);
2243 	fp = fp_get_noref_locked_with_iocount(p, fd);
2244 
2245 	if (1 == os_ref_release_locked(&fp->fp_iocount)) {
2246 		if (fp->fp_flags & FP_SELCONFLICT) {
2247 			fp->fp_flags &= ~FP_SELCONFLICT;
2248 		}
2249 
2250 		if (fdp->fd_fpdrainwait) {
2251 			fdp->fd_fpdrainwait = 0;
2252 			needwakeup = 1;
2253 		}
2254 	}
2255 	proc_fdunlock(p);
2256 
2257 	if (needwakeup) {
2258 		wakeup(&fdp->fd_fpdrainwait);
2259 	}
2260 	return 0;
2261 }
2262 
2263 
2264 #pragma mark syscalls
2265 
2266 #ifndef HFS_GET_BOOT_INFO
2267 #define HFS_GET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00004)
2268 #endif
2269 
2270 #ifndef HFS_SET_BOOT_INFO
2271 #define HFS_SET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00005)
2272 #endif
2273 
2274 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
2275 #define APFSIOC_REVERT_TO_SNAPSHOT  _IOW('J', 1, u_int64_t)
2276 #endif
2277 
2278 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
2279 	        (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
2280 	        (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
2281 	        ? 1 : 0)
2282 
2283 /*
2284  * sys_getdtablesize
2285  *
2286  * Description:	Returns the per process maximum size of the descriptor table
2287  *
2288  * Parameters:	p				Process being queried
2289  *		retval				Pointer to the call return area
2290  *
2291  * Returns:	0				Success
2292  *
2293  * Implicit returns:
2294  *		*retval (modified)		Size of dtable
2295  */
2296 int
sys_getdtablesize(proc_t p,__unused struct getdtablesize_args * uap,int32_t * retval)2297 sys_getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
2298 {
2299 	*retval = proc_limitgetcur_nofile(p);
2300 	return 0;
2301 }
2302 
2303 
2304 /*
2305  * check_file_seek_range
2306  *
2307  * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
2308  *
2309  * Parameters:  fl		Flock structure.
2310  *		cur_file_offset	Current offset in the file.
2311  *
2312  * Returns:     0               on Success.
2313  *		EOVERFLOW	on overflow.
2314  *		EINVAL          on offset less than zero.
2315  */
2316 
2317 static int
check_file_seek_range(struct flock * fl,off_t cur_file_offset)2318 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
2319 {
2320 	if (fl->l_whence == SEEK_CUR) {
2321 		/* Check if the start marker is beyond LLONG_MAX. */
2322 		if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
2323 			/* Check if start marker is negative */
2324 			if (fl->l_start < 0) {
2325 				return EINVAL;
2326 			}
2327 			return EOVERFLOW;
2328 		}
2329 		/* Check if the start marker is negative. */
2330 		if (fl->l_start + cur_file_offset < 0) {
2331 			return EINVAL;
2332 		}
2333 		/* Check if end marker is beyond LLONG_MAX. */
2334 		if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
2335 		    cur_file_offset, fl->l_len - 1))) {
2336 			return EOVERFLOW;
2337 		}
2338 		/* Check if the end marker is negative. */
2339 		if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
2340 		    fl->l_len < 0)) {
2341 			return EINVAL;
2342 		}
2343 	} else if (fl->l_whence == SEEK_SET) {
2344 		/* Check if the start marker is negative. */
2345 		if (fl->l_start < 0) {
2346 			return EINVAL;
2347 		}
2348 		/* Check if the end marker is beyond LLONG_MAX. */
2349 		if ((fl->l_len > 0) &&
2350 		    CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
2351 			return EOVERFLOW;
2352 		}
2353 		/* Check if the end marker is negative. */
2354 		if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
2355 			return EINVAL;
2356 		}
2357 	}
2358 	return 0;
2359 }
2360 
2361 
2362 /*
2363  * sys_dup
2364  *
2365  * Description:	Duplicate a file descriptor.
2366  *
2367  * Parameters:	p				Process performing the dup
2368  *		uap->fd				The fd to dup
2369  *		retval				Pointer to the call return area
2370  *
2371  * Returns:	0				Success
2372  *		!0				Errno
2373  *
2374  * Implicit returns:
2375  *		*retval (modified)		The new descriptor
2376  */
2377 int
sys_dup(proc_t p,struct dup_args * uap,int32_t * retval)2378 sys_dup(proc_t p, struct dup_args *uap, int32_t *retval)
2379 {
2380 	struct filedesc *fdp = &p->p_fd;
2381 	int old = uap->fd;
2382 	int new, error;
2383 	struct fileproc *fp;
2384 
2385 	proc_fdlock(p);
2386 	if ((error = fp_lookup(p, old, &fp, 1))) {
2387 		proc_fdunlock(p);
2388 		return error;
2389 	}
2390 	if (fp_isguarded(fp, GUARD_DUP)) {
2391 		error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2392 		(void) fp_drop(p, old, fp, 1);
2393 		proc_fdunlock(p);
2394 		return error;
2395 	}
2396 	if ((error = fdalloc(p, 0, &new))) {
2397 		fp_drop(p, old, fp, 1);
2398 		proc_fdunlock(p);
2399 		return error;
2400 	}
2401 	error = finishdup(p, fdp, old, new, 0, retval);
2402 
2403 	if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
2404 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
2405 		    new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp_get_data(fp)));
2406 	}
2407 
2408 	fp_drop(p, old, fp, 1);
2409 	proc_fdunlock(p);
2410 
2411 	return error;
2412 }
2413 
2414 /*
2415  * sys_dup2
2416  *
2417  * Description:	Duplicate a file descriptor to a particular value.
2418  *
2419  * Parameters:	p				Process performing the dup
2420  *		uap->from			The fd to dup
2421  *		uap->to				The fd to dup it to
2422  *		retval				Pointer to the call return area
2423  *
2424  * Returns:	0				Success
2425  *		!0				Errno
2426  *
2427  * Implicit returns:
2428  *		*retval (modified)		The new descriptor
2429  */
2430 int
sys_dup2(proc_t p,struct dup2_args * uap,int32_t * retval)2431 sys_dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
2432 {
2433 	return dup2(p, uap->from, uap->to, retval);
2434 }
2435 
2436 int
dup2(proc_t p,int old,int new,int * retval)2437 dup2(proc_t p, int old, int new, int *retval)
2438 {
2439 	struct filedesc *fdp = &p->p_fd;
2440 	struct fileproc *fp, *nfp;
2441 	int i, error;
2442 
2443 	proc_fdlock(p);
2444 
2445 startover:
2446 	if ((error = fp_lookup(p, old, &fp, 1))) {
2447 		proc_fdunlock(p);
2448 		return error;
2449 	}
2450 	if (fp_isguarded(fp, GUARD_DUP)) {
2451 		error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2452 		(void) fp_drop(p, old, fp, 1);
2453 		proc_fdunlock(p);
2454 		return error;
2455 	}
2456 	if (new < 0 || new >= proc_limitgetcur_nofile(p)) {
2457 		fp_drop(p, old, fp, 1);
2458 		proc_fdunlock(p);
2459 		return EBADF;
2460 	}
2461 	if (old == new) {
2462 		fp_drop(p, old, fp, 1);
2463 		*retval = new;
2464 		proc_fdunlock(p);
2465 		return 0;
2466 	}
2467 	if (new < 0 || new >= fdp->fd_nfiles) {
2468 		if ((error = fdalloc(p, new, &i))) {
2469 			fp_drop(p, old, fp, 1);
2470 			proc_fdunlock(p);
2471 			return error;
2472 		}
2473 		if (new != i) {
2474 			fdrelse(p, i);
2475 			goto closeit;
2476 		}
2477 	} else {
2478 closeit:
2479 		if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
2480 			fp_drop(p, old, fp, 1);
2481 			procfdtbl_waitfd(p, new);
2482 #if DIAGNOSTIC
2483 			proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2484 #endif
2485 			goto startover;
2486 		}
2487 
2488 		if ((nfp = fdp->fd_ofiles[new]) != NULL) {
2489 			if (fp_isguarded(nfp, GUARD_CLOSE)) {
2490 				fp_drop(p, old, fp, 1);
2491 				error = fp_guard_exception(p,
2492 				    new, nfp, kGUARD_EXC_CLOSE);
2493 				proc_fdunlock(p);
2494 				return error;
2495 			}
2496 			(void)fp_close_and_unlock(p, new, nfp, FD_DUP2RESV);
2497 			proc_fdlock(p);
2498 			assert(fdp->fd_ofileflags[new] & UF_RESERVED);
2499 		} else {
2500 #if DIAGNOSTIC
2501 			if (fdp->fd_ofiles[new] != NULL) {
2502 				panic("dup2: no ref on fileproc %d", new);
2503 			}
2504 #endif
2505 			procfdtbl_reservefd(p, new);
2506 		}
2507 	}
2508 #if DIAGNOSTIC
2509 	if (fdp->fd_ofiles[new] != 0) {
2510 		panic("dup2: overwriting fd_ofiles with new %d", new);
2511 	}
2512 	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
2513 		panic("dup2: unreserved fileflags with new %d", new);
2514 	}
2515 #endif
2516 	error = finishdup(p, fdp, old, new, 0, retval);
2517 	fp_drop(p, old, fp, 1);
2518 	proc_fdunlock(p);
2519 
2520 	return error;
2521 }
2522 
2523 
2524 /*
2525  * fcntl
2526  *
2527  * Description:	The file control system call.
2528  *
2529  * Parameters:	p				Process performing the fcntl
2530  *		uap->fd				The fd to operate against
2531  *		uap->cmd			The command to perform
2532  *		uap->arg			Pointer to the command argument
2533  *		retval				Pointer to the call return area
2534  *
2535  * Returns:	0				Success
2536  *		!0				Errno (see fcntl_nocancel)
2537  *
2538  * Implicit returns:
2539  *		*retval (modified)		fcntl return value (if any)
2540  *
2541  * Notes:	This system call differs from fcntl_nocancel() in that it
2542  *		tests for cancellation prior to performing a potentially
2543  *		blocking operation.
2544  */
2545 int
sys_fcntl(proc_t p,struct fcntl_args * uap,int32_t * retval)2546 sys_fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
2547 {
2548 	__pthread_testcancel(1);
2549 	return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
2550 }
2551 
2552 #define ACCOUNT_OPENFROM_ENTITLEMENT \
2553 	"com.apple.private.vfs.role-account-openfrom"
2554 
2555 /*
2556  * sys_fcntl_nocancel
2557  *
2558  * Description:	A non-cancel-testing file control system call.
2559  *
2560  * Parameters:	p				Process performing the fcntl
2561  *		uap->fd				The fd to operate against
2562  *		uap->cmd			The command to perform
2563  *		uap->arg			Pointer to the command argument
2564  *		retval				Pointer to the call return area
2565  *
2566  * Returns:	0				Success
2567  *		EINVAL
2568  *	fp_lookup:EBADF				Bad file descriptor
2569  * [F_DUPFD]
2570  *	fdalloc:EMFILE
2571  *	fdalloc:ENOMEM
2572  *	finishdup:EBADF
2573  *	finishdup:ENOMEM
2574  * [F_SETOWN]
2575  *		ESRCH
2576  * [F_SETLK]
2577  *		EBADF
2578  *		EOVERFLOW
2579  *	copyin:EFAULT
2580  *	vnode_getwithref:???
2581  *	VNOP_ADVLOCK:???
2582  *	msleep:ETIMEDOUT
2583  * [F_GETLK]
2584  *		EBADF
2585  *		EOVERFLOW
2586  *	copyin:EFAULT
2587  *	copyout:EFAULT
2588  *	vnode_getwithref:???
2589  *	VNOP_ADVLOCK:???
2590  * [F_PREALLOCATE]
2591  *		EBADF
2592  *		EFBIG
2593  *		EINVAL
2594  *		ENOSPC
2595  *	copyin:EFAULT
2596  *	copyout:EFAULT
2597  *	vnode_getwithref:???
2598  *	VNOP_ALLOCATE:???
2599  * [F_SETSIZE,F_RDADVISE]
2600  *		EBADF
2601  *		EINVAL
2602  *	copyin:EFAULT
2603  *	vnode_getwithref:???
2604  * [F_RDAHEAD,F_NOCACHE]
2605  *		EBADF
2606  *	vnode_getwithref:???
2607  * [???]
2608  *
2609  * Implicit returns:
2610  *		*retval (modified)		fcntl return value (if any)
2611  */
2612 #define SYS_FCNTL_DECLARE_VFS_CONTEXT(context) \
2613 	struct vfs_context context = { \
2614 	    .vc_thread = current_thread(), \
2615 	    .vc_ucred = fp->f_cred, \
2616 	}
2617 
2618 static user_addr_t
sys_fnctl_parse_arg(proc_t p,user_long_t arg)2619 sys_fnctl_parse_arg(proc_t p, user_long_t arg)
2620 {
2621 	/*
2622 	 * Since the arg parameter is defined as a long but may be
2623 	 * either a long or a pointer we must take care to handle
2624 	 * sign extension issues.  Our sys call munger will sign
2625 	 * extend a long when we are called from a 32-bit process.
2626 	 * Since we can never have an address greater than 32-bits
2627 	 * from a 32-bit process we lop off the top 32-bits to avoid
2628 	 * getting the wrong address
2629 	 */
2630 	return proc_is64bit(p) ? arg : CAST_USER_ADDR_T((uint32_t)arg);
2631 }
2632 
2633 /* cleanup code common to fnctl functions, for when the fdlock is still held */
2634 static int
sys_fcntl_out(proc_t p,int fd,struct fileproc * fp,int error)2635 sys_fcntl_out(proc_t p, int fd, struct fileproc *fp, int error)
2636 {
2637 	fp_drop(p, fd, fp, 1);
2638 	proc_fdunlock(p);
2639 	return error;
2640 }
2641 
2642 /* cleanup code common to fnctl acting on vnodes, once they unlocked the fdlock */
2643 static int
sys_fcntl_outdrop(proc_t p,int fd,struct fileproc * fp,struct vnode * vp,int error)2644 sys_fcntl_outdrop(proc_t p, int fd, struct fileproc *fp, struct vnode *vp, int error)
2645 {
2646 #pragma unused(vp)
2647 
2648 	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2649 	fp_drop(p, fd, fp, 0);
2650 	return error;
2651 }
2652 
2653 typedef int (*sys_fnctl_handler_t)(proc_t p, int fd, int cmd, user_long_t arg,
2654     struct fileproc *fp, int32_t *retval);
2655 
2656 typedef int (*sys_fnctl_vnode_handler_t)(proc_t p, int fd, int cmd,
2657     user_long_t arg, struct fileproc *fp, struct vnode *vp, int32_t *retval);
2658 
2659 /*
2660  * SPI (private) for opening a file starting from a dir fd
2661  *
2662  * Note: do not inline to keep stack usage under control.
2663  */
2664 __attribute__((noinline))
2665 static int
sys_fcntl__OPENFROM(proc_t p,int fd,int cmd,user_long_t arg,struct fileproc * fp,struct vnode * vp,int32_t * retval)2666 sys_fcntl__OPENFROM(proc_t p, int fd, int cmd, user_long_t arg,
2667     struct fileproc *fp, struct vnode *vp, int32_t *retval)
2668 {
2669 #pragma unused(cmd)
2670 
2671 	user_addr_t argp = sys_fnctl_parse_arg(p, arg);
2672 	struct user_fopenfrom fopen;
2673 	struct vnode_attr *va;
2674 	struct nameidata *nd;
2675 	int error, cmode;
2676 	bool has_entitlement;
2677 
2678 	/* Check if this isn't a valid file descriptor */
2679 	if ((fp->f_flag & FREAD) == 0) {
2680 		return sys_fcntl_out(p, fd, fp, EBADF);
2681 	}
2682 	proc_fdunlock(p);
2683 
2684 	if (vnode_getwithref(vp)) {
2685 		error = ENOENT;
2686 		goto outdrop;
2687 	}
2688 
2689 	/* Only valid for directories */
2690 	if (vp->v_type != VDIR) {
2691 		vnode_put(vp);
2692 		error = ENOTDIR;
2693 		goto outdrop;
2694 	}
2695 
2696 	/*
2697 	 * Only entitled apps may use the credentials of the thread
2698 	 * that opened the file descriptor.
2699 	 * Non-entitled threads will use their own context.
2700 	 */
2701 	has_entitlement = IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT);
2702 
2703 	/* Get flags, mode and pathname arguments. */
2704 	if (IS_64BIT_PROCESS(p)) {
2705 		error = copyin(argp, &fopen, sizeof(fopen));
2706 	} else {
2707 		struct user32_fopenfrom fopen32;
2708 
2709 		error = copyin(argp, &fopen32, sizeof(fopen32));
2710 		fopen.o_flags = fopen32.o_flags;
2711 		fopen.o_mode = fopen32.o_mode;
2712 		fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
2713 	}
2714 	if (error) {
2715 		vnode_put(vp);
2716 		goto outdrop;
2717 	}
2718 
2719 	/* open1() can have really deep stacks, so allocate those */
2720 	va = kalloc_type(struct vnode_attr, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2721 	nd = kalloc_type(struct nameidata, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2722 
2723 	AUDIT_ARG(fflags, fopen.o_flags);
2724 	AUDIT_ARG(mode, fopen.o_mode);
2725 	VATTR_INIT(va);
2726 	/* Mask off all but regular access permissions */
2727 	cmode = ((fopen.o_mode & ~p->p_fd.fd_cmask) & ALLPERMS) & ~S_ISTXT;
2728 	VATTR_SET(va, va_mode, cmode & ACCESSPERMS);
2729 
2730 	SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2731 
2732 	/* Start the lookup relative to the file descriptor's vnode. */
2733 	NDINIT(nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2734 	    fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
2735 	nd->ni_dvp = vp;
2736 
2737 	error = open1(has_entitlement ? &context : vfs_context_current(),
2738 	    nd, fopen.o_flags, va, NULL, NULL, retval, AUTH_OPEN_NOAUTHFD);
2739 
2740 	kfree_type(struct vnode_attr, va);
2741 	kfree_type(struct nameidata, nd);
2742 
2743 	vnode_put(vp);
2744 
2745 outdrop:
2746 	return sys_fcntl_outdrop(p, fd, fp, vp, error);
2747 }
2748 
2749 int
sys_fcntl_nocancel(proc_t p,struct fcntl_nocancel_args * uap,int32_t * retval)2750 sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
2751 {
2752 	int fd = uap->fd;
2753 	int cmd = uap->cmd;
2754 	struct filedesc *fdp = &p->p_fd;
2755 	struct fileproc *fp;
2756 	struct vnode *vp = NULLVP;      /* for AUDIT_ARG() at end */
2757 	unsigned int oflags, nflags;
2758 	int i, tmp, error, error2, flg = 0;
2759 	struct flock fl = {};
2760 	struct flocktimeout fltimeout;
2761 	struct timespec *timeout = NULL;
2762 	off_t offset;
2763 	int newmin;
2764 	daddr64_t lbn, bn;
2765 	unsigned int fflag;
2766 	user_addr_t argp;
2767 	boolean_t is64bit;
2768 	int has_entitlement = 0;
2769 
2770 	AUDIT_ARG(fd, uap->fd);
2771 	AUDIT_ARG(cmd, uap->cmd);
2772 
2773 	proc_fdlock(p);
2774 	if ((error = fp_lookup(p, fd, &fp, 1))) {
2775 		proc_fdunlock(p);
2776 		return error;
2777 	}
2778 
2779 	SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2780 
2781 	is64bit = proc_is64bit(p);
2782 	if (is64bit) {
2783 		argp = uap->arg;
2784 	} else {
2785 		/*
2786 		 * Since the arg parameter is defined as a long but may be
2787 		 * either a long or a pointer we must take care to handle
2788 		 * sign extension issues.  Our sys call munger will sign
2789 		 * extend a long when we are called from a 32-bit process.
2790 		 * Since we can never have an address greater than 32-bits
2791 		 * from a 32-bit process we lop off the top 32-bits to avoid
2792 		 * getting the wrong address
2793 		 */
2794 		argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
2795 	}
2796 
2797 #if CONFIG_MACF
2798 	error = mac_file_check_fcntl(kauth_cred_get(), fp->fp_glob, cmd, uap->arg);
2799 	if (error) {
2800 		goto out;
2801 	}
2802 #endif
2803 
2804 	switch (cmd) {
2805 	case F_DUPFD:
2806 	case F_DUPFD_CLOEXEC:
2807 		if (fp_isguarded(fp, GUARD_DUP)) {
2808 			error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
2809 			goto out;
2810 		}
2811 		newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2812 		AUDIT_ARG(value32, newmin);
2813 		if (newmin < 0 || newmin >= proc_limitgetcur_nofile(p)) {
2814 			error = EINVAL;
2815 			goto out;
2816 		}
2817 		if ((error = fdalloc(p, newmin, &i))) {
2818 			goto out;
2819 		}
2820 		error = finishdup(p, fdp, fd, i,
2821 		    cmd == F_DUPFD_CLOEXEC ? FP_CLOEXEC : 0, retval);
2822 		goto out;
2823 
2824 	case F_GETFD:
2825 		*retval = (fp->fp_flags & FP_CLOEXEC) ? FD_CLOEXEC : 0;
2826 		error = 0;
2827 		goto out;
2828 
2829 	case F_SETFD:
2830 		AUDIT_ARG(value32, (uint32_t)uap->arg);
2831 		if (uap->arg & FD_CLOEXEC) {
2832 			fp->fp_flags |= FP_CLOEXEC;
2833 			error = 0;
2834 		} else if (!fp->fp_guard_attrs) {
2835 			fp->fp_flags &= ~FP_CLOEXEC;
2836 			error = 0;
2837 		} else {
2838 			error = fp_guard_exception(p,
2839 			    fd, fp, kGUARD_EXC_NOCLOEXEC);
2840 		}
2841 		goto out;
2842 
2843 	case F_GETFL:
2844 		fflag = fp->f_flag;
2845 		if ((fflag & O_EVTONLY) && proc_disallow_rw_for_o_evtonly(p)) {
2846 			/*
2847 			 * We insert back F_READ so that conversion back to open flags with
2848 			 * OFLAGS() will come out right. We only need to set 'FREAD' as the
2849 			 * 'O_RDONLY' is always implied.
2850 			 */
2851 			fflag |= FREAD;
2852 		}
2853 		*retval = OFLAGS(fflag);
2854 		error = 0;
2855 		goto out;
2856 
2857 	case F_SETFL:
2858 		// FIXME (rdar://54898652)
2859 		//
2860 		// this code is broken if fnctl(F_SETFL), ioctl() are
2861 		// called concurrently for the same fileglob.
2862 
2863 		tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2864 		AUDIT_ARG(value32, tmp);
2865 
2866 		os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
2867 			nflags  = oflags & ~FCNTLFLAGS;
2868 			nflags |= FFLAGS(tmp) & FCNTLFLAGS;
2869 		});
2870 		tmp = nflags & FNONBLOCK;
2871 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2872 		if (error) {
2873 			goto out;
2874 		}
2875 		tmp = nflags & FASYNC;
2876 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
2877 		if (!error) {
2878 			goto out;
2879 		}
2880 		os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
2881 		tmp = 0;
2882 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2883 		goto out;
2884 
2885 	case F_GETOWN:
2886 		if (fp->f_type == DTYPE_SOCKET) {
2887 			*retval = ((struct socket *)fp_get_data(fp))->so_pgid;
2888 			error = 0;
2889 			goto out;
2890 		}
2891 		error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
2892 		*retval = -*retval;
2893 		goto out;
2894 
2895 	case F_SETOWN:
2896 		tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
2897 		AUDIT_ARG(value32, tmp);
2898 		if (fp->f_type == DTYPE_SOCKET) {
2899 			((struct socket *)fp_get_data(fp))->so_pgid = tmp;
2900 			error = 0;
2901 			goto out;
2902 		}
2903 		if (fp->f_type == DTYPE_PIPE) {
2904 			error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2905 			goto out;
2906 		}
2907 
2908 		if (tmp <= 0) {
2909 			tmp = -tmp;
2910 		} else {
2911 			proc_t p1 = proc_find(tmp);
2912 			if (p1 == 0) {
2913 				error = ESRCH;
2914 				goto out;
2915 			}
2916 			tmp = (int)p1->p_pgrpid;
2917 			proc_rele(p1);
2918 		}
2919 		error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2920 		goto out;
2921 
2922 	case F_SETNOSIGPIPE:
2923 		tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
2924 		if (fp->f_type == DTYPE_SOCKET) {
2925 #if SOCKETS
2926 			error = sock_setsockopt((struct socket *)fp_get_data(fp),
2927 			    SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
2928 #else
2929 			error = EINVAL;
2930 #endif
2931 		} else {
2932 			struct fileglob *fg = fp->fp_glob;
2933 
2934 			lck_mtx_lock_spin(&fg->fg_lock);
2935 			if (tmp) {
2936 				fg->fg_lflags |= FG_NOSIGPIPE;
2937 			} else {
2938 				fg->fg_lflags &= ~FG_NOSIGPIPE;
2939 			}
2940 			lck_mtx_unlock(&fg->fg_lock);
2941 			error = 0;
2942 		}
2943 		goto out;
2944 
2945 	case F_GETNOSIGPIPE:
2946 		if (fp->f_type == DTYPE_SOCKET) {
2947 #if SOCKETS
2948 			int retsize = sizeof(*retval);
2949 			error = sock_getsockopt((struct socket *)fp_get_data(fp),
2950 			    SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
2951 #else
2952 			error = EINVAL;
2953 #endif
2954 		} else {
2955 			*retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
2956 			    1 : 0;
2957 			error = 0;
2958 		}
2959 		goto out;
2960 
2961 	case F_SETCONFINED:
2962 		/*
2963 		 * If this is the only reference to this fglob in the process
2964 		 * and it's already marked as close-on-fork then mark it as
2965 		 * (immutably) "confined" i.e. any fd that points to it will
2966 		 * forever be close-on-fork, and attempts to use an IPC
2967 		 * mechanism to move the descriptor elsewhere will fail.
2968 		 */
2969 		if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
2970 			struct fileglob *fg = fp->fp_glob;
2971 
2972 			lck_mtx_lock_spin(&fg->fg_lock);
2973 			if (fg->fg_lflags & FG_CONFINED) {
2974 				error = 0;
2975 			} else if (1 != os_ref_get_count_raw(&fg->fg_count)) {
2976 				error = EAGAIN; /* go close the dup .. */
2977 			} else if (fp->fp_flags & FP_CLOFORK) {
2978 				fg->fg_lflags |= FG_CONFINED;
2979 				error = 0;
2980 			} else {
2981 				error = EBADF;  /* open without O_CLOFORK? */
2982 			}
2983 			lck_mtx_unlock(&fg->fg_lock);
2984 		} else {
2985 			/*
2986 			 * Other subsystems may have built on the immutability
2987 			 * of FG_CONFINED; clearing it may be tricky.
2988 			 */
2989 			error = EPERM;          /* immutable */
2990 		}
2991 		goto out;
2992 
2993 	case F_GETCONFINED:
2994 		*retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? 1 : 0;
2995 		error = 0;
2996 		goto out;
2997 
2998 	case F_SETLKWTIMEOUT:
2999 	case F_SETLKW:
3000 	case F_OFD_SETLKWTIMEOUT:
3001 	case F_OFD_SETLKW:
3002 		flg |= F_WAIT;
3003 		OS_FALLTHROUGH;
3004 
3005 	case F_SETLK:
3006 	case F_OFD_SETLK:
3007 		if (fp->f_type != DTYPE_VNODE) {
3008 			error = EBADF;
3009 			goto out;
3010 		}
3011 		vp = (struct vnode *)fp_get_data(fp);
3012 
3013 		fflag = fp->f_flag;
3014 		offset = fp->f_offset;
3015 		proc_fdunlock(p);
3016 
3017 		/* Copy in the lock structure */
3018 		if (F_SETLKWTIMEOUT == cmd || F_OFD_SETLKWTIMEOUT == cmd) {
3019 			error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
3020 			if (error) {
3021 				goto outdrop;
3022 			}
3023 			fl = fltimeout.fl;
3024 			timeout = &fltimeout.timeout;
3025 		} else {
3026 			error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3027 			if (error) {
3028 				goto outdrop;
3029 			}
3030 		}
3031 
3032 		/* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3033 		/* and ending byte for EOVERFLOW in SEEK_SET */
3034 		error = check_file_seek_range(&fl, offset);
3035 		if (error) {
3036 			goto outdrop;
3037 		}
3038 
3039 		if ((error = vnode_getwithref(vp))) {
3040 			goto outdrop;
3041 		}
3042 		if (fl.l_whence == SEEK_CUR) {
3043 			fl.l_start += offset;
3044 		}
3045 
3046 #if CONFIG_MACF
3047 		error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3048 		    F_SETLK, &fl);
3049 		if (error) {
3050 			(void)vnode_put(vp);
3051 			goto outdrop;
3052 		}
3053 #endif
3054 
3055 #if CONFIG_FILE_LEASES
3056 		(void)vnode_breaklease(vp, O_WRONLY, vfs_context_current());
3057 #endif
3058 
3059 		switch (cmd) {
3060 		case F_OFD_SETLK:
3061 		case F_OFD_SETLKW:
3062 		case F_OFD_SETLKWTIMEOUT:
3063 			flg |= F_OFD_LOCK;
3064 			switch (fl.l_type) {
3065 			case F_RDLCK:
3066 				if ((fflag & FREAD) == 0) {
3067 					error = EBADF;
3068 					break;
3069 				}
3070 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3071 				    F_SETLK, &fl, flg, &context, timeout);
3072 				break;
3073 			case F_WRLCK:
3074 				if ((fflag & FWRITE) == 0) {
3075 					error = EBADF;
3076 					break;
3077 				}
3078 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3079 				    F_SETLK, &fl, flg, &context, timeout);
3080 				break;
3081 			case F_UNLCK:
3082 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3083 				    F_UNLCK, &fl, F_OFD_LOCK, &context,
3084 				    timeout);
3085 				break;
3086 			default:
3087 				error = EINVAL;
3088 				break;
3089 			}
3090 			if (0 == error &&
3091 			    (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
3092 				struct fileglob *fg = fp->fp_glob;
3093 
3094 				/*
3095 				 * arrange F_UNLCK on last close (once
3096 				 * set, FG_HAS_OFDLOCK is immutable)
3097 				 */
3098 				if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
3099 					lck_mtx_lock_spin(&fg->fg_lock);
3100 					fg->fg_lflags |= FG_HAS_OFDLOCK;
3101 					lck_mtx_unlock(&fg->fg_lock);
3102 				}
3103 			}
3104 			break;
3105 		default:
3106 			flg |= F_POSIX;
3107 			switch (fl.l_type) {
3108 			case F_RDLCK:
3109 				if ((fflag & FREAD) == 0) {
3110 					error = EBADF;
3111 					break;
3112 				}
3113 				// XXX UInt32 unsafe for LP64 kernel
3114 				os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3115 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3116 				    F_SETLK, &fl, flg, &context, timeout);
3117 				break;
3118 			case F_WRLCK:
3119 				if ((fflag & FWRITE) == 0) {
3120 					error = EBADF;
3121 					break;
3122 				}
3123 				// XXX UInt32 unsafe for LP64 kernel
3124 				os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3125 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3126 				    F_SETLK, &fl, flg, &context, timeout);
3127 				break;
3128 			case F_UNLCK:
3129 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3130 				    F_UNLCK, &fl, F_POSIX, &context, timeout);
3131 				break;
3132 			default:
3133 				error = EINVAL;
3134 				break;
3135 			}
3136 			break;
3137 		}
3138 		(void) vnode_put(vp);
3139 		goto outdrop;
3140 
3141 	case F_GETLK:
3142 	case F_OFD_GETLK:
3143 	case F_GETLKPID:
3144 	case F_OFD_GETLKPID:
3145 		if (fp->f_type != DTYPE_VNODE) {
3146 			error = EBADF;
3147 			goto out;
3148 		}
3149 		vp = (struct vnode *)fp_get_data(fp);
3150 
3151 		offset = fp->f_offset;
3152 		proc_fdunlock(p);
3153 
3154 		/* Copy in the lock structure */
3155 		error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3156 		if (error) {
3157 			goto outdrop;
3158 		}
3159 
3160 		/* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3161 		/* and ending byte for EOVERFLOW in SEEK_SET */
3162 		error = check_file_seek_range(&fl, offset);
3163 		if (error) {
3164 			goto outdrop;
3165 		}
3166 
3167 		if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
3168 			error = EINVAL;
3169 			goto outdrop;
3170 		}
3171 
3172 		switch (fl.l_type) {
3173 		case F_RDLCK:
3174 		case F_UNLCK:
3175 		case F_WRLCK:
3176 			break;
3177 		default:
3178 			error = EINVAL;
3179 			goto outdrop;
3180 		}
3181 
3182 		switch (fl.l_whence) {
3183 		case SEEK_CUR:
3184 		case SEEK_SET:
3185 		case SEEK_END:
3186 			break;
3187 		default:
3188 			error = EINVAL;
3189 			goto outdrop;
3190 		}
3191 
3192 		if ((error = vnode_getwithref(vp)) == 0) {
3193 			if (fl.l_whence == SEEK_CUR) {
3194 				fl.l_start += offset;
3195 			}
3196 
3197 #if CONFIG_MACF
3198 			error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3199 			    cmd, &fl);
3200 			if (error == 0)
3201 #endif
3202 			switch (cmd) {
3203 			case F_OFD_GETLK:
3204 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3205 				    F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
3206 				break;
3207 			case F_OFD_GETLKPID:
3208 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3209 				    F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
3210 				break;
3211 			default:
3212 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3213 				    cmd, &fl, F_POSIX, &context, NULL);
3214 				break;
3215 			}
3216 
3217 			(void)vnode_put(vp);
3218 
3219 			if (error == 0) {
3220 				error = copyout((caddr_t)&fl, argp, sizeof(fl));
3221 			}
3222 		}
3223 		goto outdrop;
3224 
3225 	case F_PREALLOCATE: {
3226 		fstore_t alloc_struct;    /* structure for allocate command */
3227 		u_int32_t alloc_flags = 0;
3228 
3229 		if (fp->f_type != DTYPE_VNODE) {
3230 			error = EBADF;
3231 			goto out;
3232 		}
3233 
3234 		vp = (struct vnode *)fp_get_data(fp);
3235 		proc_fdunlock(p);
3236 
3237 		/* make sure that we have write permission */
3238 		if ((fp->f_flag & FWRITE) == 0) {
3239 			error = EBADF;
3240 			goto outdrop;
3241 		}
3242 
3243 		error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
3244 		if (error) {
3245 			goto outdrop;
3246 		}
3247 
3248 		/* now set the space allocated to 0 */
3249 		alloc_struct.fst_bytesalloc = 0;
3250 
3251 		/*
3252 		 * Do some simple parameter checking
3253 		 */
3254 
3255 		/* set up the flags */
3256 
3257 		alloc_flags |= PREALLOCATE;
3258 
3259 		if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
3260 			alloc_flags |= ALLOCATECONTIG;
3261 		}
3262 
3263 		if (alloc_struct.fst_flags & F_ALLOCATEALL) {
3264 			alloc_flags |= ALLOCATEALL;
3265 		}
3266 
3267 		if (alloc_struct.fst_flags & F_ALLOCATEPERSIST) {
3268 			alloc_flags |= ALLOCATEPERSIST;
3269 		}
3270 
3271 		/*
3272 		 * Do any position mode specific stuff.  The only
3273 		 * position mode  supported now is PEOFPOSMODE
3274 		 */
3275 
3276 		switch (alloc_struct.fst_posmode) {
3277 		case F_PEOFPOSMODE:
3278 			if (alloc_struct.fst_offset != 0) {
3279 				error = EINVAL;
3280 				goto outdrop;
3281 			}
3282 
3283 			alloc_flags |= ALLOCATEFROMPEOF;
3284 			break;
3285 
3286 		case F_VOLPOSMODE:
3287 			if (alloc_struct.fst_offset <= 0) {
3288 				error = EINVAL;
3289 				goto outdrop;
3290 			}
3291 
3292 			alloc_flags |= ALLOCATEFROMVOL;
3293 			break;
3294 
3295 		default: {
3296 			error = EINVAL;
3297 			goto outdrop;
3298 		}
3299 		}
3300 		if ((error = vnode_getwithref(vp)) == 0) {
3301 			/*
3302 			 * call allocate to get the space
3303 			 */
3304 			error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
3305 			    &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
3306 			    &context);
3307 			(void)vnode_put(vp);
3308 
3309 			error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
3310 
3311 			if (error == 0) {
3312 				error = error2;
3313 			}
3314 		}
3315 		goto outdrop;
3316 	}
3317 	case F_PUNCHHOLE: {
3318 		fpunchhole_t args;
3319 
3320 		if (fp->f_type != DTYPE_VNODE) {
3321 			error = EBADF;
3322 			goto out;
3323 		}
3324 
3325 		vp = (struct vnode *)fp_get_data(fp);
3326 		proc_fdunlock(p);
3327 
3328 		/* need write permissions */
3329 		if ((fp->f_flag & FWRITE) == 0) {
3330 			error = EPERM;
3331 			goto outdrop;
3332 		}
3333 
3334 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3335 			goto outdrop;
3336 		}
3337 
3338 		if ((error = vnode_getwithref(vp))) {
3339 			goto outdrop;
3340 		}
3341 
3342 #if CONFIG_MACF
3343 		if ((error = mac_vnode_check_write(&context, fp->fp_glob->fg_cred, vp))) {
3344 			(void)vnode_put(vp);
3345 			goto outdrop;
3346 		}
3347 #endif
3348 
3349 		error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
3350 		(void)vnode_put(vp);
3351 
3352 		goto outdrop;
3353 	}
3354 	case F_TRIM_ACTIVE_FILE: {
3355 		ftrimactivefile_t args;
3356 
3357 		if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
3358 			error = EACCES;
3359 			goto out;
3360 		}
3361 
3362 		if (fp->f_type != DTYPE_VNODE) {
3363 			error = EBADF;
3364 			goto out;
3365 		}
3366 
3367 		vp = (struct vnode *)fp_get_data(fp);
3368 		proc_fdunlock(p);
3369 
3370 		/* need write permissions */
3371 		if ((fp->f_flag & FWRITE) == 0) {
3372 			error = EPERM;
3373 			goto outdrop;
3374 		}
3375 
3376 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3377 			goto outdrop;
3378 		}
3379 
3380 		if ((error = vnode_getwithref(vp))) {
3381 			goto outdrop;
3382 		}
3383 
3384 		error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
3385 		(void)vnode_put(vp);
3386 
3387 		goto outdrop;
3388 	}
3389 	case F_SPECULATIVE_READ: {
3390 		fspecread_t args;
3391 		off_t temp_length = 0;
3392 
3393 		if (fp->f_type != DTYPE_VNODE) {
3394 			error = EBADF;
3395 			goto out;
3396 		}
3397 
3398 		vp = (struct vnode *)fp_get_data(fp);
3399 		proc_fdunlock(p);
3400 
3401 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3402 			goto outdrop;
3403 		}
3404 
3405 		/* Discard invalid offsets or lengths */
3406 		if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
3407 			error = EINVAL;
3408 			goto outdrop;
3409 		}
3410 
3411 		/*
3412 		 * Round the file offset down to a page-size boundary (or to 0).
3413 		 * The filesystem will need to round the length up to the end of the page boundary
3414 		 * or to the EOF of the file.
3415 		 */
3416 		uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
3417 		uint64_t foff_delta = args.fsr_offset - foff;
3418 		args.fsr_offset = (off_t) foff;
3419 
3420 		/*
3421 		 * Now add in the delta to the supplied length. Since we may have adjusted the
3422 		 * offset, increase it by the amount that we adjusted.
3423 		 */
3424 		if (os_add_overflow(args.fsr_length, foff_delta, &args.fsr_length)) {
3425 			error = EOVERFLOW;
3426 			goto outdrop;
3427 		}
3428 
3429 		/*
3430 		 * Make sure (fsr_offset + fsr_length) does not overflow.
3431 		 */
3432 		if (os_add_overflow(args.fsr_offset, args.fsr_length, &temp_length)) {
3433 			error = EOVERFLOW;
3434 			goto outdrop;
3435 		}
3436 
3437 		if ((error = vnode_getwithref(vp))) {
3438 			goto outdrop;
3439 		}
3440 		error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
3441 		(void)vnode_put(vp);
3442 
3443 		goto outdrop;
3444 	}
3445 	case F_SETSIZE:
3446 		if (fp->f_type != DTYPE_VNODE) {
3447 			error = EBADF;
3448 			goto out;
3449 		}
3450 		vp = (struct vnode *)fp_get_data(fp);
3451 		proc_fdunlock(p);
3452 
3453 		error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
3454 		if (error) {
3455 			goto outdrop;
3456 		}
3457 		AUDIT_ARG(value64, offset);
3458 
3459 		error = vnode_getwithref(vp);
3460 		if (error) {
3461 			goto outdrop;
3462 		}
3463 
3464 #if CONFIG_MACF
3465 		error = mac_vnode_check_truncate(&context,
3466 		    fp->fp_glob->fg_cred, vp);
3467 		if (error) {
3468 			(void)vnode_put(vp);
3469 			goto outdrop;
3470 		}
3471 #endif
3472 		/*
3473 		 * Make sure that we are root.  Growing a file
3474 		 * without zero filling the data is a security hole.
3475 		 */
3476 		if (!kauth_cred_issuser(kauth_cred_get())) {
3477 			error = EACCES;
3478 		} else {
3479 			/*
3480 			 * Require privilege to change file size without zerofill,
3481 			 * else will change the file size and zerofill it.
3482 			 */
3483 			error = priv_check_cred(kauth_cred_get(), PRIV_VFS_SETSIZE, 0);
3484 			if (error == 0) {
3485 				error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
3486 			} else {
3487 				error = vnode_setsize(vp, offset, 0, &context);
3488 			}
3489 
3490 #if CONFIG_MACF
3491 			if (error == 0) {
3492 				mac_vnode_notify_truncate(&context, fp->fp_glob->fg_cred, vp);
3493 			}
3494 #endif
3495 		}
3496 
3497 		(void)vnode_put(vp);
3498 		goto outdrop;
3499 
3500 	case F_RDAHEAD:
3501 		if (fp->f_type != DTYPE_VNODE) {
3502 			error = EBADF;
3503 			goto out;
3504 		}
3505 		if (uap->arg) {
3506 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3507 		} else {
3508 			os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3509 		}
3510 		goto out;
3511 
3512 	case F_NOCACHE:
3513 		if (fp->f_type != DTYPE_VNODE) {
3514 			error = EBADF;
3515 			goto out;
3516 		}
3517 		if (uap->arg) {
3518 			os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3519 		} else {
3520 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3521 		}
3522 		goto out;
3523 
3524 	case F_NODIRECT:
3525 		if (fp->f_type != DTYPE_VNODE) {
3526 			error = EBADF;
3527 			goto out;
3528 		}
3529 		if (uap->arg) {
3530 			os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3531 		} else {
3532 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3533 		}
3534 		goto out;
3535 
3536 	case F_SINGLE_WRITER:
3537 		if (fp->f_type != DTYPE_VNODE) {
3538 			error = EBADF;
3539 			goto out;
3540 		}
3541 		if (uap->arg) {
3542 			os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3543 		} else {
3544 			os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3545 		}
3546 		goto out;
3547 
3548 	case F_GLOBAL_NOCACHE:
3549 		if (fp->f_type != DTYPE_VNODE) {
3550 			error = EBADF;
3551 			goto out;
3552 		}
3553 		vp = (struct vnode *)fp_get_data(fp);
3554 		proc_fdunlock(p);
3555 
3556 		if ((error = vnode_getwithref(vp)) == 0) {
3557 			*retval = vnode_isnocache(vp);
3558 
3559 			if (uap->arg) {
3560 				vnode_setnocache(vp);
3561 			} else {
3562 				vnode_clearnocache(vp);
3563 			}
3564 
3565 			(void)vnode_put(vp);
3566 		}
3567 		goto outdrop;
3568 
3569 	case F_CHECK_OPENEVT:
3570 		if (fp->f_type != DTYPE_VNODE) {
3571 			error = EBADF;
3572 			goto out;
3573 		}
3574 		vp = (struct vnode *)fp_get_data(fp);
3575 		proc_fdunlock(p);
3576 
3577 		if ((error = vnode_getwithref(vp)) == 0) {
3578 			*retval = vnode_is_openevt(vp);
3579 
3580 			if (uap->arg) {
3581 				vnode_set_openevt(vp);
3582 			} else {
3583 				vnode_clear_openevt(vp);
3584 			}
3585 
3586 			(void)vnode_put(vp);
3587 		}
3588 		goto outdrop;
3589 
3590 	case F_RDADVISE: {
3591 		struct radvisory ra_struct;
3592 
3593 		if (fp->f_type != DTYPE_VNODE) {
3594 			error = EBADF;
3595 			goto out;
3596 		}
3597 		vp = (struct vnode *)fp_get_data(fp);
3598 		proc_fdunlock(p);
3599 
3600 		if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
3601 			goto outdrop;
3602 		}
3603 		if (ra_struct.ra_offset < 0 || ra_struct.ra_count < 0) {
3604 			error = EINVAL;
3605 			goto outdrop;
3606 		}
3607 		if ((error = vnode_getwithref(vp)) == 0) {
3608 			error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
3609 
3610 			(void)vnode_put(vp);
3611 		}
3612 		goto outdrop;
3613 	}
3614 
3615 	case F_FLUSH_DATA:
3616 
3617 		if (fp->f_type != DTYPE_VNODE) {
3618 			error = EBADF;
3619 			goto out;
3620 		}
3621 		vp = (struct vnode *)fp_get_data(fp);
3622 		proc_fdunlock(p);
3623 
3624 		if ((error = vnode_getwithref(vp)) == 0) {
3625 			error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
3626 
3627 			(void)vnode_put(vp);
3628 		}
3629 		goto outdrop;
3630 
3631 	case F_LOG2PHYS:
3632 	case F_LOG2PHYS_EXT: {
3633 		struct log2phys l2p_struct = {};    /* structure for allocate command */
3634 		int devBlockSize;
3635 
3636 		off_t file_offset = 0;
3637 		size_t a_size = 0;
3638 		size_t run = 0;
3639 
3640 		if (cmd == F_LOG2PHYS_EXT) {
3641 			error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
3642 			if (error) {
3643 				goto out;
3644 			}
3645 			file_offset = l2p_struct.l2p_devoffset;
3646 		} else {
3647 			file_offset = fp->f_offset;
3648 		}
3649 		if (fp->f_type != DTYPE_VNODE) {
3650 			error = EBADF;
3651 			goto out;
3652 		}
3653 		vp = (struct vnode *)fp_get_data(fp);
3654 		proc_fdunlock(p);
3655 		if ((error = vnode_getwithref(vp))) {
3656 			goto outdrop;
3657 		}
3658 		error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
3659 		if (error) {
3660 			(void)vnode_put(vp);
3661 			goto outdrop;
3662 		}
3663 		error = VNOP_BLKTOOFF(vp, lbn, &offset);
3664 		if (error) {
3665 			(void)vnode_put(vp);
3666 			goto outdrop;
3667 		}
3668 		devBlockSize = vfs_devblocksize(vnode_mount(vp));
3669 		if (cmd == F_LOG2PHYS_EXT) {
3670 			if (l2p_struct.l2p_contigbytes < 0) {
3671 				vnode_put(vp);
3672 				error = EINVAL;
3673 				goto outdrop;
3674 			}
3675 
3676 			a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
3677 		} else {
3678 			a_size = devBlockSize;
3679 		}
3680 
3681 		error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
3682 
3683 		(void)vnode_put(vp);
3684 
3685 		if (!error) {
3686 			l2p_struct.l2p_flags = 0;       /* for now */
3687 			if (cmd == F_LOG2PHYS_EXT) {
3688 				l2p_struct.l2p_contigbytes = run - (file_offset - offset);
3689 			} else {
3690 				l2p_struct.l2p_contigbytes = 0; /* for now */
3691 			}
3692 
3693 			/*
3694 			 * The block number being -1 suggests that the file offset is not backed
3695 			 * by any real blocks on-disk.  As a result, just let it be passed back up wholesale.
3696 			 */
3697 			if (bn == -1) {
3698 				/* Don't multiply it by the block size */
3699 				l2p_struct.l2p_devoffset = bn;
3700 			} else {
3701 				l2p_struct.l2p_devoffset = bn * devBlockSize;
3702 				l2p_struct.l2p_devoffset += file_offset - offset;
3703 			}
3704 			error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
3705 		}
3706 		goto outdrop;
3707 	}
3708 	case F_GETPATH:
3709 	case F_GETPATH_NOFIRMLINK: {
3710 		char *pathbufp;
3711 		size_t pathlen;
3712 
3713 		if (fp->f_type != DTYPE_VNODE) {
3714 			error = EBADF;
3715 			goto out;
3716 		}
3717 		vp = (struct vnode *)fp_get_data(fp);
3718 		proc_fdunlock(p);
3719 
3720 		pathlen = MAXPATHLEN;
3721 		pathbufp = zalloc(ZV_NAMEI);
3722 
3723 		if ((error = vnode_getwithref(vp)) == 0) {
3724 			error = vn_getpath_ext(vp, NULL, pathbufp,
3725 			    &pathlen, cmd == F_GETPATH_NOFIRMLINK ?
3726 			    VN_GETPATH_NO_FIRMLINK : 0);
3727 			(void)vnode_put(vp);
3728 
3729 			if (error == 0) {
3730 				error = copyout((caddr_t)pathbufp, argp, pathlen);
3731 			}
3732 		}
3733 		zfree(ZV_NAMEI, pathbufp);
3734 		goto outdrop;
3735 	}
3736 
3737 	case F_PATHPKG_CHECK: {
3738 		char *pathbufp;
3739 		size_t pathlen;
3740 
3741 		if (fp->f_type != DTYPE_VNODE) {
3742 			error = EBADF;
3743 			goto out;
3744 		}
3745 		vp = (struct vnode *)fp_get_data(fp);
3746 		proc_fdunlock(p);
3747 
3748 		pathlen = MAXPATHLEN;
3749 		pathbufp = zalloc(ZV_NAMEI);
3750 
3751 		if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
3752 			if ((error = vnode_getwithref(vp)) == 0) {
3753 				AUDIT_ARG(text, pathbufp);
3754 				error = vn_path_package_check(vp, pathbufp, (int)pathlen, retval);
3755 
3756 				(void)vnode_put(vp);
3757 			}
3758 		}
3759 		zfree(ZV_NAMEI, pathbufp);
3760 		goto outdrop;
3761 	}
3762 
3763 	case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
3764 	case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZE
3765 	case F_BARRIERFSYNC:  // fsync + barrier
3766 	case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
3767 	case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
3768 		if (fp->f_type != DTYPE_VNODE) {
3769 			error = EBADF;
3770 			goto out;
3771 		}
3772 		vp = (struct vnode *)fp_get_data(fp);
3773 		proc_fdunlock(p);
3774 
3775 		if ((error = vnode_getwithref(vp)) == 0) {
3776 			error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
3777 
3778 			(void)vnode_put(vp);
3779 		}
3780 		break;
3781 	}
3782 
3783 	/*
3784 	 * SPI (private) for opening a file starting from a dir fd
3785 	 */
3786 	case F_OPENFROM: {
3787 		/* Check if this isn't a valid file descriptor */
3788 		if (fp->f_type != DTYPE_VNODE) {
3789 			error = EBADF;
3790 			goto out;
3791 		}
3792 		vp = (struct vnode *)fp_get_data(fp);
3793 
3794 		return sys_fcntl__OPENFROM(p, fd, cmd, uap->arg, fp, vp, retval);
3795 	}
3796 
3797 	/*
3798 	 * SPI (private) for unlinking a file starting from a dir fd
3799 	 */
3800 	case F_UNLINKFROM: {
3801 		user_addr_t pathname;
3802 
3803 		/* Check if this isn't a valid file descriptor */
3804 		if ((fp->f_type != DTYPE_VNODE) ||
3805 		    (fp->f_flag & FREAD) == 0) {
3806 			error = EBADF;
3807 			goto out;
3808 		}
3809 		vp = (struct vnode *)fp_get_data(fp);
3810 		proc_fdunlock(p);
3811 
3812 		if (vnode_getwithref(vp)) {
3813 			error = ENOENT;
3814 			goto outdrop;
3815 		}
3816 
3817 		/* Only valid for directories */
3818 		if (vp->v_type != VDIR) {
3819 			vnode_put(vp);
3820 			error = ENOTDIR;
3821 			goto outdrop;
3822 		}
3823 
3824 		/*
3825 		 * Only entitled apps may use the credentials of the thread
3826 		 * that opened the file descriptor.
3827 		 * Non-entitled threads will use their own context.
3828 		 */
3829 		if (IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT)) {
3830 			has_entitlement = 1;
3831 		}
3832 
3833 		/* Get flags, mode and pathname arguments. */
3834 		if (IS_64BIT_PROCESS(p)) {
3835 			pathname = (user_addr_t)argp;
3836 		} else {
3837 			pathname = CAST_USER_ADDR_T(argp);
3838 		}
3839 
3840 		/* Start the lookup relative to the file descriptor's vnode. */
3841 		error = unlink1(has_entitlement ? &context : vfs_context_current(),
3842 		    vp, pathname, UIO_USERSPACE, 0);
3843 
3844 		vnode_put(vp);
3845 		break;
3846 	}
3847 
3848 	case F_ADDSIGS:
3849 	case F_ADDFILESIGS:
3850 	case F_ADDFILESIGS_FOR_DYLD_SIM:
3851 	case F_ADDFILESIGS_RETURN:
3852 	case F_ADDFILESIGS_INFO:
3853 	{
3854 		struct cs_blob *blob = NULL;
3855 		struct user_fsignatures fs;
3856 		kern_return_t kr;
3857 		vm_offset_t kernel_blob_addr;
3858 		vm_size_t kernel_blob_size;
3859 		int blob_add_flags = 0;
3860 		const size_t sizeof_fs = (cmd == F_ADDFILESIGS_INFO ?
3861 		    offsetof(struct user_fsignatures, fs_cdhash /* first output element */) :
3862 		    offsetof(struct user_fsignatures, fs_fsignatures_size /* compat */));
3863 
3864 		if (fp->f_type != DTYPE_VNODE) {
3865 			error = EBADF;
3866 			goto out;
3867 		}
3868 		vp = (struct vnode *)fp_get_data(fp);
3869 		proc_fdunlock(p);
3870 
3871 		if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3872 			blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
3873 			if ((proc_getcsflags(p) & CS_KILL) == 0) {
3874 				proc_lock(p);
3875 				proc_csflags_set(p, CS_KILL);
3876 				proc_unlock(p);
3877 			}
3878 		}
3879 
3880 		error = vnode_getwithref(vp);
3881 		if (error) {
3882 			goto outdrop;
3883 		}
3884 
3885 		if (IS_64BIT_PROCESS(p)) {
3886 			error = copyin(argp, &fs, sizeof_fs);
3887 		} else {
3888 			if (cmd == F_ADDFILESIGS_INFO) {
3889 				error = EINVAL;
3890 				vnode_put(vp);
3891 				goto outdrop;
3892 			}
3893 
3894 			struct user32_fsignatures fs32;
3895 
3896 			error = copyin(argp, &fs32, sizeof(fs32));
3897 			fs.fs_file_start = fs32.fs_file_start;
3898 			fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
3899 			fs.fs_blob_size = fs32.fs_blob_size;
3900 		}
3901 
3902 		if (error) {
3903 			vnode_put(vp);
3904 			goto outdrop;
3905 		}
3906 
3907 		/*
3908 		 * First check if we have something loaded a this offset
3909 		 */
3910 		blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
3911 		if (blob != NULL) {
3912 			/* If this is for dyld_sim revalidate the blob */
3913 			if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3914 				error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
3915 				if (error) {
3916 					blob = NULL;
3917 					if (error != EAGAIN) {
3918 						vnode_put(vp);
3919 						goto outdrop;
3920 					}
3921 				}
3922 			}
3923 		}
3924 
3925 		if (blob == NULL) {
3926 			/*
3927 			 * An arbitrary limit, to prevent someone from mapping in a 20GB blob.  This should cover
3928 			 * our use cases for the immediate future, but note that at the time of this commit, some
3929 			 * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
3930 			 *
3931 			 * We should consider how we can manage this more effectively; the above means that some
3932 			 * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
3933 			 * threshold considered ridiculous at the time of this change.
3934 			 */
3935 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
3936 			if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
3937 				error = E2BIG;
3938 				vnode_put(vp);
3939 				goto outdrop;
3940 			}
3941 
3942 			kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
3943 			kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
3944 			if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
3945 				error = ENOMEM;
3946 				vnode_put(vp);
3947 				goto outdrop;
3948 			}
3949 
3950 			if (cmd == F_ADDSIGS) {
3951 				error = copyin(fs.fs_blob_start,
3952 				    (void *) kernel_blob_addr,
3953 				    fs.fs_blob_size);
3954 			} else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM || F_ADDFILESIGS_INFO */
3955 				int resid;
3956 
3957 				error = vn_rdwr(UIO_READ,
3958 				    vp,
3959 				    (caddr_t) kernel_blob_addr,
3960 				    (int)kernel_blob_size,
3961 				    fs.fs_file_start + fs.fs_blob_start,
3962 				    UIO_SYSSPACE,
3963 				    0,
3964 				    kauth_cred_get(),
3965 				    &resid,
3966 				    p);
3967 				if ((error == 0) && resid) {
3968 					/* kernel_blob_size rounded to a page size, but signature may be at end of file */
3969 					memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
3970 				}
3971 			}
3972 
3973 			if (error) {
3974 				ubc_cs_blob_deallocate(kernel_blob_addr,
3975 				    kernel_blob_size);
3976 				vnode_put(vp);
3977 				goto outdrop;
3978 			}
3979 
3980 			blob = NULL;
3981 			error = ubc_cs_blob_add(vp,
3982 			    proc_platform(p),
3983 			    CPU_TYPE_ANY,                       /* not for a specific architecture */
3984 			    CPU_SUBTYPE_ANY,
3985 			    fs.fs_file_start,
3986 			    &kernel_blob_addr,
3987 			    kernel_blob_size,
3988 			    NULL,
3989 			    blob_add_flags,
3990 			    &blob);
3991 
3992 			/* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
3993 			if (error) {
3994 				if (kernel_blob_addr) {
3995 					ubc_cs_blob_deallocate(kernel_blob_addr,
3996 					    kernel_blob_size);
3997 				}
3998 				vnode_put(vp);
3999 				goto outdrop;
4000 			} else {
4001 #if CHECK_CS_VALIDATION_BITMAP
4002 				ubc_cs_validation_bitmap_allocate( vp );
4003 #endif
4004 			}
4005 		}
4006 
4007 		if (cmd == F_ADDFILESIGS_RETURN || cmd == F_ADDFILESIGS_FOR_DYLD_SIM ||
4008 		    cmd == F_ADDFILESIGS_INFO) {
4009 			/*
4010 			 * The first element of the structure is a
4011 			 * off_t that happen to have the same size for
4012 			 * all archs. Lets overwrite that.
4013 			 */
4014 			off_t end_offset = 0;
4015 			if (blob) {
4016 				end_offset = blob->csb_end_offset;
4017 			}
4018 			error = copyout(&end_offset, argp, sizeof(end_offset));
4019 
4020 			if (error) {
4021 				vnode_put(vp);
4022 				goto outdrop;
4023 			}
4024 		}
4025 
4026 		if (cmd == F_ADDFILESIGS_INFO) {
4027 			/* Return information. What we copy out depends on the size of the
4028 			 * passed in structure, to keep binary compatibility. */
4029 
4030 			if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
4031 				// enough room for fs_cdhash[20]+fs_hash_type
4032 
4033 				if (blob != NULL) {
4034 					error = copyout(blob->csb_cdhash,
4035 					    (vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
4036 					    USER_FSIGNATURES_CDHASH_LEN);
4037 					if (error) {
4038 						vnode_put(vp);
4039 						goto outdrop;
4040 					}
4041 					int hashtype = cs_hash_type(blob->csb_hashtype);
4042 					error = copyout(&hashtype,
4043 					    (vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
4044 					    sizeof(int));
4045 					if (error) {
4046 						vnode_put(vp);
4047 						goto outdrop;
4048 					}
4049 				}
4050 			}
4051 		}
4052 
4053 		(void) vnode_put(vp);
4054 		break;
4055 	}
4056 #if CONFIG_SUPPLEMENTAL_SIGNATURES
4057 	case F_ADDFILESUPPL:
4058 	{
4059 		struct vnode *ivp;
4060 		struct cs_blob *blob = NULL;
4061 		struct user_fsupplement fs;
4062 		int orig_fd;
4063 		struct fileproc* orig_fp = NULL;
4064 		kern_return_t kr;
4065 		vm_offset_t kernel_blob_addr;
4066 		vm_size_t kernel_blob_size;
4067 
4068 		if (!IS_64BIT_PROCESS(p)) {
4069 			error = EINVAL;
4070 			goto out; // drop fp and unlock fds
4071 		}
4072 
4073 		if (fp->f_type != DTYPE_VNODE) {
4074 			error = EBADF;
4075 			goto out;
4076 		}
4077 
4078 		error = copyin(argp, &fs, sizeof(fs));
4079 		if (error) {
4080 			goto out;
4081 		}
4082 
4083 		orig_fd = fs.fs_orig_fd;
4084 		if ((error = fp_lookup(p, orig_fd, &orig_fp, 1))) {
4085 			printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
4086 			goto out;
4087 		}
4088 
4089 		if (orig_fp->f_type != DTYPE_VNODE) {
4090 			error = EBADF;
4091 			fp_drop(p, orig_fd, orig_fp, 1);
4092 			goto out;
4093 		}
4094 
4095 		ivp = (struct vnode *)fp_get_data(orig_fp);
4096 
4097 		vp = (struct vnode *)fp_get_data(fp);
4098 
4099 		proc_fdunlock(p);
4100 
4101 		error = vnode_getwithref(ivp);
4102 		if (error) {
4103 			fp_drop(p, orig_fd, orig_fp, 0);
4104 			goto outdrop; //drop fp
4105 		}
4106 
4107 		error = vnode_getwithref(vp);
4108 		if (error) {
4109 			vnode_put(ivp);
4110 			fp_drop(p, orig_fd, orig_fp, 0);
4111 			goto outdrop;
4112 		}
4113 
4114 		if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
4115 			error = E2BIG;
4116 			goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
4117 		}
4118 
4119 		kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
4120 		kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
4121 		if (kr != KERN_SUCCESS) {
4122 			error = ENOMEM;
4123 			goto dropboth;
4124 		}
4125 
4126 		int resid;
4127 		error = vn_rdwr(UIO_READ, vp,
4128 		    (caddr_t)kernel_blob_addr, (int)kernel_blob_size,
4129 		    fs.fs_file_start + fs.fs_blob_start,
4130 		    UIO_SYSSPACE, 0,
4131 		    kauth_cred_get(), &resid, p);
4132 		if ((error == 0) && resid) {
4133 			/* kernel_blob_size rounded to a page size, but signature may be at end of file */
4134 			memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
4135 		}
4136 
4137 		if (error) {
4138 			ubc_cs_blob_deallocate(kernel_blob_addr,
4139 			    kernel_blob_size);
4140 			goto dropboth;
4141 		}
4142 
4143 		error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
4144 		    &kernel_blob_addr, kernel_blob_size, &blob);
4145 
4146 		/* ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed */
4147 		if (error) {
4148 			if (kernel_blob_addr) {
4149 				ubc_cs_blob_deallocate(kernel_blob_addr,
4150 				    kernel_blob_size);
4151 			}
4152 			goto dropboth;
4153 		}
4154 		vnode_put(ivp);
4155 		vnode_put(vp);
4156 		fp_drop(p, orig_fd, orig_fp, 0);
4157 		break;
4158 
4159 dropboth:
4160 		vnode_put(ivp);
4161 		vnode_put(vp);
4162 		fp_drop(p, orig_fd, orig_fp, 0);
4163 		goto outdrop;
4164 	}
4165 #endif
4166 	case F_GETCODEDIR:
4167 	case F_FINDSIGS: {
4168 		error = ENOTSUP;
4169 		goto out;
4170 	}
4171 	case F_CHECK_LV: {
4172 		struct fileglob *fg;
4173 		fchecklv_t lv = {};
4174 
4175 		if (fp->f_type != DTYPE_VNODE) {
4176 			error = EBADF;
4177 			goto out;
4178 		}
4179 		fg = fp->fp_glob;
4180 		proc_fdunlock(p);
4181 
4182 		if (IS_64BIT_PROCESS(p)) {
4183 			error = copyin(argp, &lv, sizeof(lv));
4184 		} else {
4185 			struct user32_fchecklv lv32 = {};
4186 
4187 			error = copyin(argp, &lv32, sizeof(lv32));
4188 			lv.lv_file_start = lv32.lv_file_start;
4189 			lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
4190 			lv.lv_error_message_size = lv32.lv_error_message_size;
4191 		}
4192 		if (error) {
4193 			goto outdrop;
4194 		}
4195 
4196 #if CONFIG_MACF
4197 		error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
4198 		    (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
4199 #endif
4200 
4201 		break;
4202 	}
4203 	case F_GETSIGSINFO: {
4204 		struct cs_blob *blob = NULL;
4205 		fgetsigsinfo_t sigsinfo = {};
4206 
4207 		if (fp->f_type != DTYPE_VNODE) {
4208 			error = EBADF;
4209 			goto out;
4210 		}
4211 		vp = (struct vnode *)fp_get_data(fp);
4212 		proc_fdunlock(p);
4213 
4214 		error = vnode_getwithref(vp);
4215 		if (error) {
4216 			goto outdrop;
4217 		}
4218 
4219 		error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
4220 		if (error) {
4221 			vnode_put(vp);
4222 			goto outdrop;
4223 		}
4224 
4225 		blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
4226 		if (blob == NULL) {
4227 			error = ENOENT;
4228 			vnode_put(vp);
4229 			goto outdrop;
4230 		}
4231 		switch (sigsinfo.fg_info_request) {
4232 		case GETSIGSINFO_PLATFORM_BINARY:
4233 			sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
4234 			error = copyout(&sigsinfo.fg_sig_is_platform,
4235 			    (vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
4236 			    sizeof(sigsinfo.fg_sig_is_platform));
4237 			if (error) {
4238 				vnode_put(vp);
4239 				goto outdrop;
4240 			}
4241 			break;
4242 		default:
4243 			error = EINVAL;
4244 			vnode_put(vp);
4245 			goto outdrop;
4246 		}
4247 		vnode_put(vp);
4248 		break;
4249 	}
4250 #if CONFIG_PROTECT
4251 	case F_GETPROTECTIONCLASS: {
4252 		if (fp->f_type != DTYPE_VNODE) {
4253 			error = EBADF;
4254 			goto out;
4255 		}
4256 		vp = (struct vnode *)fp_get_data(fp);
4257 
4258 		proc_fdunlock(p);
4259 
4260 		if (vnode_getwithref(vp)) {
4261 			error = ENOENT;
4262 			goto outdrop;
4263 		}
4264 
4265 		struct vnode_attr va;
4266 
4267 		VATTR_INIT(&va);
4268 		VATTR_WANTED(&va, va_dataprotect_class);
4269 		error = VNOP_GETATTR(vp, &va, &context);
4270 		if (!error) {
4271 			if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
4272 				*retval = va.va_dataprotect_class;
4273 			} else {
4274 				error = ENOTSUP;
4275 			}
4276 		}
4277 
4278 		vnode_put(vp);
4279 		break;
4280 	}
4281 
4282 	case F_SETPROTECTIONCLASS: {
4283 		/* tmp must be a valid PROTECTION_CLASS_* */
4284 		tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4285 
4286 		if (fp->f_type != DTYPE_VNODE) {
4287 			error = EBADF;
4288 			goto out;
4289 		}
4290 		vp = (struct vnode *)fp_get_data(fp);
4291 
4292 		proc_fdunlock(p);
4293 
4294 		if (vnode_getwithref(vp)) {
4295 			error = ENOENT;
4296 			goto outdrop;
4297 		}
4298 
4299 		/* Only go forward if you have write access */
4300 		vfs_context_t ctx = vfs_context_current();
4301 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4302 			vnode_put(vp);
4303 			error = EBADF;
4304 			goto outdrop;
4305 		}
4306 
4307 		struct vnode_attr va;
4308 
4309 		VATTR_INIT(&va);
4310 		VATTR_SET(&va, va_dataprotect_class, tmp);
4311 
4312 		error = VNOP_SETATTR(vp, &va, ctx);
4313 
4314 		vnode_put(vp);
4315 		break;
4316 	}
4317 
4318 	case F_TRANSCODEKEY: {
4319 		if (fp->f_type != DTYPE_VNODE) {
4320 			error = EBADF;
4321 			goto out;
4322 		}
4323 
4324 		vp = (struct vnode *)fp_get_data(fp);
4325 		proc_fdunlock(p);
4326 
4327 		if (vnode_getwithref(vp)) {
4328 			error = ENOENT;
4329 			goto outdrop;
4330 		}
4331 
4332 		cp_key_t k = {
4333 			.len = CP_MAX_WRAPPEDKEYSIZE,
4334 		};
4335 
4336 		k.key = kalloc_data(CP_MAX_WRAPPEDKEYSIZE, Z_WAITOK | Z_ZERO);
4337 		if (k.key == NULL) {
4338 			error = ENOMEM;
4339 		} else {
4340 			error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
4341 		}
4342 
4343 		vnode_put(vp);
4344 
4345 		if (error == 0) {
4346 			error = copyout(k.key, argp, k.len);
4347 			*retval = k.len;
4348 		}
4349 		kfree_data(k.key, CP_MAX_WRAPPEDKEYSIZE);
4350 
4351 		break;
4352 	}
4353 
4354 	case F_GETPROTECTIONLEVEL:  {
4355 		if (fp->f_type != DTYPE_VNODE) {
4356 			error = EBADF;
4357 			goto out;
4358 		}
4359 
4360 		vp = (struct vnode*)fp_get_data(fp);
4361 		proc_fdunlock(p);
4362 
4363 		if (vnode_getwithref(vp)) {
4364 			error = ENOENT;
4365 			goto outdrop;
4366 		}
4367 
4368 		error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
4369 
4370 		vnode_put(vp);
4371 		break;
4372 	}
4373 
4374 	case F_GETDEFAULTPROTLEVEL:  {
4375 		if (fp->f_type != DTYPE_VNODE) {
4376 			error = EBADF;
4377 			goto out;
4378 		}
4379 
4380 		vp = (struct vnode*)fp_get_data(fp);
4381 		proc_fdunlock(p);
4382 
4383 		if (vnode_getwithref(vp)) {
4384 			error = ENOENT;
4385 			goto outdrop;
4386 		}
4387 
4388 		/*
4389 		 * if cp_get_major_vers fails, error will be set to proper errno
4390 		 * and cp_version will still be 0.
4391 		 */
4392 
4393 		error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
4394 
4395 		vnode_put(vp);
4396 		break;
4397 	}
4398 
4399 #endif /* CONFIG_PROTECT */
4400 
4401 	case F_MOVEDATAEXTENTS: {
4402 		struct fileproc *fp2 = NULL;
4403 		struct vnode *src_vp = NULLVP;
4404 		struct vnode *dst_vp = NULLVP;
4405 		/* We need to grab the 2nd FD out of the arguments before moving on. */
4406 		int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
4407 
4408 		error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
4409 		if (error) {
4410 			goto out;
4411 		}
4412 
4413 		if (fp->f_type != DTYPE_VNODE) {
4414 			error = EBADF;
4415 			goto out;
4416 		}
4417 
4418 		/*
4419 		 * For now, special case HFS+ and APFS only, since this
4420 		 * is SPI.
4421 		 */
4422 		src_vp = (struct vnode *)fp_get_data(fp);
4423 		if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
4424 			error = ENOTSUP;
4425 			goto out;
4426 		}
4427 
4428 		/*
4429 		 * Get the references before we start acquiring iocounts on the vnodes,
4430 		 * while we still hold the proc fd lock
4431 		 */
4432 		if ((error = fp_lookup(p, fd2, &fp2, 1))) {
4433 			error = EBADF;
4434 			goto out;
4435 		}
4436 		if (fp2->f_type != DTYPE_VNODE) {
4437 			fp_drop(p, fd2, fp2, 1);
4438 			error = EBADF;
4439 			goto out;
4440 		}
4441 		dst_vp = (struct vnode *)fp_get_data(fp2);
4442 		if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
4443 			fp_drop(p, fd2, fp2, 1);
4444 			error = ENOTSUP;
4445 			goto out;
4446 		}
4447 
4448 #if CONFIG_MACF
4449 		/* Re-do MAC checks against the new FD, pass in a fake argument */
4450 		error = mac_file_check_fcntl(kauth_cred_get(), fp2->fp_glob, cmd, 0);
4451 		if (error) {
4452 			fp_drop(p, fd2, fp2, 1);
4453 			goto out;
4454 		}
4455 #endif
4456 		/* Audit the 2nd FD */
4457 		AUDIT_ARG(fd, fd2);
4458 
4459 		proc_fdunlock(p);
4460 
4461 		if (vnode_getwithref(src_vp)) {
4462 			fp_drop(p, fd2, fp2, 0);
4463 			error = ENOENT;
4464 			goto outdrop;
4465 		}
4466 		if (vnode_getwithref(dst_vp)) {
4467 			vnode_put(src_vp);
4468 			fp_drop(p, fd2, fp2, 0);
4469 			error = ENOENT;
4470 			goto outdrop;
4471 		}
4472 
4473 		/*
4474 		 * Basic asserts; validate they are not the same and that
4475 		 * both live on the same filesystem.
4476 		 */
4477 		if (dst_vp == src_vp) {
4478 			vnode_put(src_vp);
4479 			vnode_put(dst_vp);
4480 			fp_drop(p, fd2, fp2, 0);
4481 			error = EINVAL;
4482 			goto outdrop;
4483 		}
4484 
4485 		if (dst_vp->v_mount != src_vp->v_mount) {
4486 			vnode_put(src_vp);
4487 			vnode_put(dst_vp);
4488 			fp_drop(p, fd2, fp2, 0);
4489 			error = EXDEV;
4490 			goto outdrop;
4491 		}
4492 
4493 		/* Now we have a legit pair of FDs.  Go to work */
4494 
4495 		/* Now check for write access to the target files */
4496 		if (vnode_authorize(src_vp, NULLVP,
4497 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4498 			vnode_put(src_vp);
4499 			vnode_put(dst_vp);
4500 			fp_drop(p, fd2, fp2, 0);
4501 			error = EBADF;
4502 			goto outdrop;
4503 		}
4504 
4505 		if (vnode_authorize(dst_vp, NULLVP,
4506 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4507 			vnode_put(src_vp);
4508 			vnode_put(dst_vp);
4509 			fp_drop(p, fd2, fp2, 0);
4510 			error = EBADF;
4511 			goto outdrop;
4512 		}
4513 
4514 		/* Verify that both vps point to files and not directories */
4515 		if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
4516 			error = EINVAL;
4517 			vnode_put(src_vp);
4518 			vnode_put(dst_vp);
4519 			fp_drop(p, fd2, fp2, 0);
4520 			goto outdrop;
4521 		}
4522 
4523 		/*
4524 		 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
4525 		 * We'll pass in our special bit indicating that the new behavior is expected
4526 		 */
4527 
4528 		error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
4529 
4530 		vnode_put(src_vp);
4531 		vnode_put(dst_vp);
4532 		fp_drop(p, fd2, fp2, 0);
4533 		break;
4534 	}
4535 
4536 	case F_TRANSFEREXTENTS: {
4537 		struct fileproc *fp2 = NULL;
4538 		struct vnode *src_vp = NULLVP;
4539 		struct vnode *dst_vp = NULLVP;
4540 
4541 		/* Get 2nd FD out of the arguments. */
4542 		int fd2 = CAST_DOWN_EXPLICIT(int, uap->arg);
4543 		if (fd2 < 0) {
4544 			error = EINVAL;
4545 			goto out;
4546 		}
4547 
4548 		if (fp->f_type != DTYPE_VNODE) {
4549 			error = EBADF;
4550 			goto out;
4551 		}
4552 
4553 		/*
4554 		 * Only allow this for APFS
4555 		 */
4556 		src_vp = (struct vnode *)fp_get_data(fp);
4557 		if (src_vp->v_tag != VT_APFS) {
4558 			error = ENOTSUP;
4559 			goto out;
4560 		}
4561 
4562 		/*
4563 		 * Get the references before we start acquiring iocounts on the vnodes,
4564 		 * while we still hold the proc fd lock
4565 		 */
4566 		if ((error = fp_lookup(p, fd2, &fp2, 1))) {
4567 			error = EBADF;
4568 			goto out;
4569 		}
4570 		if (fp2->f_type != DTYPE_VNODE) {
4571 			fp_drop(p, fd2, fp2, 1);
4572 			error = EBADF;
4573 			goto out;
4574 		}
4575 		dst_vp = (struct vnode *)fp_get_data(fp2);
4576 		if (dst_vp->v_tag != VT_APFS) {
4577 			fp_drop(p, fd2, fp2, 1);
4578 			error = ENOTSUP;
4579 			goto out;
4580 		}
4581 
4582 #if CONFIG_MACF
4583 		/* Re-do MAC checks against the new FD, pass in a fake argument */
4584 		error = mac_file_check_fcntl(kauth_cred_get(), fp2->fp_glob, cmd, 0);
4585 		if (error) {
4586 			fp_drop(p, fd2, fp2, 1);
4587 			goto out;
4588 		}
4589 #endif
4590 		/* Audit the 2nd FD */
4591 		AUDIT_ARG(fd, fd2);
4592 
4593 		proc_fdunlock(p);
4594 
4595 		if (vnode_getwithref(src_vp)) {
4596 			fp_drop(p, fd2, fp2, 0);
4597 			error = ENOENT;
4598 			goto outdrop;
4599 		}
4600 		if (vnode_getwithref(dst_vp)) {
4601 			vnode_put(src_vp);
4602 			fp_drop(p, fd2, fp2, 0);
4603 			error = ENOENT;
4604 			goto outdrop;
4605 		}
4606 
4607 		/*
4608 		 * Validate they are not the same and that
4609 		 * both live on the same filesystem.
4610 		 */
4611 		if (dst_vp == src_vp) {
4612 			vnode_put(src_vp);
4613 			vnode_put(dst_vp);
4614 			fp_drop(p, fd2, fp2, 0);
4615 			error = EINVAL;
4616 			goto outdrop;
4617 		}
4618 		if (dst_vp->v_mount != src_vp->v_mount) {
4619 			vnode_put(src_vp);
4620 			vnode_put(dst_vp);
4621 			fp_drop(p, fd2, fp2, 0);
4622 			error = EXDEV;
4623 			goto outdrop;
4624 		}
4625 
4626 		/* Verify that both vps point to files and not directories */
4627 		if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
4628 			error = EINVAL;
4629 			vnode_put(src_vp);
4630 			vnode_put(dst_vp);
4631 			fp_drop(p, fd2, fp2, 0);
4632 			goto outdrop;
4633 		}
4634 
4635 
4636 		/*
4637 		 * Okay, vps are legit. Check  access.  We'll require write access
4638 		 * to both files.
4639 		 */
4640 		if (vnode_authorize(src_vp, NULLVP,
4641 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4642 			vnode_put(src_vp);
4643 			vnode_put(dst_vp);
4644 			fp_drop(p, fd2, fp2, 0);
4645 			error = EBADF;
4646 			goto outdrop;
4647 		}
4648 		if (vnode_authorize(dst_vp, NULLVP,
4649 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4650 			vnode_put(src_vp);
4651 			vnode_put(dst_vp);
4652 			fp_drop(p, fd2, fp2, 0);
4653 			error = EBADF;
4654 			goto outdrop;
4655 		}
4656 
4657 		/* Pass it on through to the fs */
4658 		error = VNOP_IOCTL(src_vp, cmd, (caddr_t)dst_vp, 0, &context);
4659 
4660 		vnode_put(src_vp);
4661 		vnode_put(dst_vp);
4662 		fp_drop(p, fd2, fp2, 0);
4663 		break;
4664 	}
4665 
4666 	/*
4667 	 * SPI for making a file compressed.
4668 	 */
4669 	case F_MAKECOMPRESSED: {
4670 		uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4671 
4672 		if (fp->f_type != DTYPE_VNODE) {
4673 			error = EBADF;
4674 			goto out;
4675 		}
4676 
4677 		vp = (struct vnode*)fp_get_data(fp);
4678 		proc_fdunlock(p);
4679 
4680 		/* get the vnode */
4681 		if (vnode_getwithref(vp)) {
4682 			error = ENOENT;
4683 			goto outdrop;
4684 		}
4685 
4686 		/* Is it a file? */
4687 		if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
4688 			vnode_put(vp);
4689 			error = EBADF;
4690 			goto outdrop;
4691 		}
4692 
4693 		/* invoke ioctl to pass off to FS */
4694 		/* Only go forward if you have write access */
4695 		vfs_context_t ctx = vfs_context_current();
4696 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4697 			vnode_put(vp);
4698 			error = EBADF;
4699 			goto outdrop;
4700 		}
4701 
4702 		error = VNOP_IOCTL(vp, cmd, (caddr_t)&gcounter, 0, &context);
4703 
4704 		vnode_put(vp);
4705 		break;
4706 	}
4707 
4708 	/*
4709 	 * SPI (private) for indicating to a filesystem that subsequent writes to
4710 	 * the open FD will written to the Fastflow.
4711 	 */
4712 	case F_SET_GREEDY_MODE:
4713 	/* intentionally drop through to the same handler as F_SETSTATIC.
4714 	 * both fcntls should pass the argument and their selector into VNOP_IOCTL.
4715 	 */
4716 
4717 	/*
4718 	 * SPI (private) for indicating to a filesystem that subsequent writes to
4719 	 * the open FD will represent static content.
4720 	 */
4721 	case F_SETSTATICCONTENT: {
4722 		caddr_t ioctl_arg = NULL;
4723 
4724 		if (uap->arg) {
4725 			ioctl_arg = (caddr_t) 1;
4726 		}
4727 
4728 		if (fp->f_type != DTYPE_VNODE) {
4729 			error = EBADF;
4730 			goto out;
4731 		}
4732 		vp = (struct vnode *)fp_get_data(fp);
4733 		proc_fdunlock(p);
4734 
4735 		error = vnode_getwithref(vp);
4736 		if (error) {
4737 			error = ENOENT;
4738 			goto outdrop;
4739 		}
4740 
4741 		/* Only go forward if you have write access */
4742 		vfs_context_t ctx = vfs_context_current();
4743 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4744 			vnode_put(vp);
4745 			error = EBADF;
4746 			goto outdrop;
4747 		}
4748 
4749 		error = VNOP_IOCTL(vp, cmd, ioctl_arg, 0, &context);
4750 		(void)vnode_put(vp);
4751 
4752 		break;
4753 	}
4754 
4755 	/*
4756 	 * SPI (private) for indicating to the lower level storage driver that the
4757 	 * subsequent writes should be of a particular IO type (burst, greedy, static),
4758 	 * or other flavors that may be necessary.
4759 	 */
4760 	case F_SETIOTYPE: {
4761 		caddr_t param_ptr;
4762 		uint32_t param;
4763 
4764 		if (uap->arg) {
4765 			/* extract 32 bits of flags from userland */
4766 			param_ptr = (caddr_t) uap->arg;
4767 			param = (uint32_t) param_ptr;
4768 		} else {
4769 			/* If no argument is specified, error out */
4770 			error = EINVAL;
4771 			goto out;
4772 		}
4773 
4774 		/*
4775 		 * Validate the different types of flags that can be specified:
4776 		 * all of them are mutually exclusive for now.
4777 		 */
4778 		switch (param) {
4779 		case F_IOTYPE_ISOCHRONOUS:
4780 			break;
4781 
4782 		default:
4783 			error = EINVAL;
4784 			goto out;
4785 		}
4786 
4787 
4788 		if (fp->f_type != DTYPE_VNODE) {
4789 			error = EBADF;
4790 			goto out;
4791 		}
4792 		vp = (struct vnode *)fp_get_data(fp);
4793 		proc_fdunlock(p);
4794 
4795 		error = vnode_getwithref(vp);
4796 		if (error) {
4797 			error = ENOENT;
4798 			goto outdrop;
4799 		}
4800 
4801 		/* Only go forward if you have write access */
4802 		vfs_context_t ctx = vfs_context_current();
4803 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4804 			vnode_put(vp);
4805 			error = EBADF;
4806 			goto outdrop;
4807 		}
4808 
4809 		error = VNOP_IOCTL(vp, cmd, param_ptr, 0, &context);
4810 		(void)vnode_put(vp);
4811 
4812 		break;
4813 	}
4814 
4815 	/*
4816 	 * Set the vnode pointed to by 'fd'
4817 	 * and tag it as the (potentially future) backing store
4818 	 * for another filesystem
4819 	 */
4820 	case F_SETBACKINGSTORE: {
4821 		if (fp->f_type != DTYPE_VNODE) {
4822 			error = EBADF;
4823 			goto out;
4824 		}
4825 
4826 		vp = (struct vnode *)fp_get_data(fp);
4827 
4828 		if (vp->v_tag != VT_HFS) {
4829 			error = EINVAL;
4830 			goto out;
4831 		}
4832 		proc_fdunlock(p);
4833 
4834 		if (vnode_getwithref(vp)) {
4835 			error = ENOENT;
4836 			goto outdrop;
4837 		}
4838 
4839 		/* only proceed if you have write access */
4840 		vfs_context_t ctx = vfs_context_current();
4841 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4842 			vnode_put(vp);
4843 			error = EBADF;
4844 			goto outdrop;
4845 		}
4846 
4847 
4848 		/* If arg != 0, set, otherwise unset */
4849 		if (uap->arg) {
4850 			error = VNOP_IOCTL(vp, cmd, (caddr_t)1, 0, &context);
4851 		} else {
4852 			error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
4853 		}
4854 
4855 		vnode_put(vp);
4856 		break;
4857 	}
4858 
4859 	/*
4860 	 * like F_GETPATH, but special semantics for
4861 	 * the mobile time machine handler.
4862 	 */
4863 	case F_GETPATH_MTMINFO: {
4864 		char *pathbufp;
4865 		int pathlen;
4866 
4867 		if (fp->f_type != DTYPE_VNODE) {
4868 			error = EBADF;
4869 			goto out;
4870 		}
4871 		vp = (struct vnode *)fp_get_data(fp);
4872 		proc_fdunlock(p);
4873 
4874 		pathlen = MAXPATHLEN;
4875 		pathbufp = zalloc(ZV_NAMEI);
4876 
4877 		if ((error = vnode_getwithref(vp)) == 0) {
4878 			int backingstore = 0;
4879 
4880 			/* Check for error from vn_getpath before moving on */
4881 			if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
4882 				if (vp->v_tag == VT_HFS) {
4883 					error = VNOP_IOCTL(vp, cmd, (caddr_t) &backingstore, 0, &context);
4884 				}
4885 				(void)vnode_put(vp);
4886 
4887 				if (error == 0) {
4888 					error = copyout((caddr_t)pathbufp, argp, pathlen);
4889 				}
4890 				if (error == 0) {
4891 					/*
4892 					 * If the copyout was successful, now check to ensure
4893 					 * that this vnode is not a BACKINGSTORE vnode.  mtmd
4894 					 * wants the path regardless.
4895 					 */
4896 					if (backingstore) {
4897 						error = EBUSY;
4898 					}
4899 				}
4900 			} else {
4901 				(void)vnode_put(vp);
4902 			}
4903 		}
4904 
4905 		zfree(ZV_NAMEI, pathbufp);
4906 		goto outdrop;
4907 	}
4908 
4909 	case F_RECYCLE: {
4910 #if !DEBUG && !DEVELOPMENT
4911 		bool allowed = false;
4912 
4913 		//
4914 		// non-debug and non-development kernels have restrictions
4915 		// on who can all this fcntl.  the process has to be marked
4916 		// with the dataless-manipulator entitlement and either the
4917 		// process or thread have to be marked rapid-aging.
4918 		//
4919 		if (!vfs_context_is_dataless_manipulator(&context)) {
4920 			error = EPERM;
4921 			goto out;
4922 		}
4923 
4924 		proc_t proc = vfs_context_proc(&context);
4925 		if (proc && (proc->p_lflag & P_LRAGE_VNODES)) {
4926 			allowed = true;
4927 		} else {
4928 			thread_t thr = vfs_context_thread(&context);
4929 			if (thr) {
4930 				struct uthread *ut = get_bsdthread_info(thr);
4931 
4932 				if (ut && (ut->uu_flag & UT_RAGE_VNODES)) {
4933 					allowed = true;
4934 				}
4935 			}
4936 		}
4937 		if (!allowed) {
4938 			error = EPERM;
4939 			goto out;
4940 		}
4941 #endif
4942 
4943 		if (fp->f_type != DTYPE_VNODE) {
4944 			error = EBADF;
4945 			goto out;
4946 		}
4947 		vp = (struct vnode *)fp_get_data(fp);
4948 		proc_fdunlock(p);
4949 
4950 		vnode_recycle(vp);
4951 		break;
4952 	}
4953 
4954 #if CONFIG_FILE_LEASES
4955 	case F_SETLEASE: {
4956 		struct fileglob *fg;
4957 		int fl_type;
4958 		int expcounts;
4959 
4960 		if (fp->f_type != DTYPE_VNODE) {
4961 			error = EBADF;
4962 			goto out;
4963 		}
4964 		vp = (struct vnode *)fp_get_data(fp);
4965 		fg = fp->fp_glob;;
4966 		proc_fdunlock(p);
4967 
4968 		/*
4969 		 * In order to allow a process to avoid breaking
4970 		 * its own leases, the expected open count needs
4971 		 * to be provided to F_SETLEASE when placing write lease.
4972 		 * Similarly, in order to allow a process to place a read lease
4973 		 * after opening the file multiple times in RW mode, the expected
4974 		 * write count needs to be provided to F_SETLEASE when placing a
4975 		 * read lease.
4976 		 *
4977 		 * We use the upper 30 bits of the integer argument (way more than
4978 		 * enough) as the expected open/write count.
4979 		 *
4980 		 * If the caller passed 0 for the expected open count,
4981 		 * assume 1.
4982 		 */
4983 		fl_type = CAST_DOWN_EXPLICIT(int, uap->arg);
4984 		expcounts = (unsigned int)fl_type >> 2;
4985 		fl_type &= 3;
4986 
4987 		if (fl_type == F_WRLCK && expcounts == 0) {
4988 			expcounts = 1;
4989 		}
4990 
4991 		AUDIT_ARG(value32, fl_type);
4992 
4993 		if ((error = vnode_getwithref(vp))) {
4994 			goto outdrop;
4995 		}
4996 
4997 		/*
4998 		 * Only support for regular file/dir mounted on local-based filesystem.
4999 		 */
5000 		if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VDIR) ||
5001 		    !(vfs_flags(vnode_mount(vp)) & MNT_LOCAL)) {
5002 			error = EBADF;
5003 			vnode_put(vp);
5004 			goto outdrop;
5005 		}
5006 
5007 		/* For directory, we only support read lease. */
5008 		if (vnode_vtype(vp) == VDIR && fl_type == F_WRLCK) {
5009 			error = ENOTSUP;
5010 			vnode_put(vp);
5011 			goto outdrop;
5012 		}
5013 
5014 		switch (fl_type) {
5015 		case F_RDLCK:
5016 		case F_WRLCK:
5017 		case F_UNLCK:
5018 			error = vnode_setlease(vp, fg, fl_type, expcounts,
5019 			    vfs_context_current());
5020 			break;
5021 		default:
5022 			error = EINVAL;
5023 			break;
5024 		}
5025 
5026 		vnode_put(vp);
5027 		goto outdrop;
5028 	}
5029 
5030 	case F_GETLEASE: {
5031 		if (fp->f_type != DTYPE_VNODE) {
5032 			error = EBADF;
5033 			goto out;
5034 		}
5035 		vp = (struct vnode *)fp_get_data(fp);
5036 		proc_fdunlock(p);
5037 
5038 		if ((error = vnode_getwithref(vp))) {
5039 			goto outdrop;
5040 		}
5041 
5042 		if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VDIR) ||
5043 		    !(vfs_flags(vnode_mount(vp)) & MNT_LOCAL)) {
5044 			error = EBADF;
5045 			vnode_put(vp);
5046 			goto outdrop;
5047 		}
5048 
5049 		error = 0;
5050 		*retval = vnode_getlease(vp);
5051 		vnode_put(vp);
5052 		goto outdrop;
5053 	}
5054 #endif /* CONFIG_FILE_LEASES */
5055 
5056 	/* SPI (private) for asserting background access to a file */
5057 	case F_ASSERT_BG_ACCESS:
5058 	/* SPI (private) for releasing background access to a file */
5059 	case F_RELEASE_BG_ACCESS: {
5060 		/*
5061 		 * Check if the process is platform code, which means
5062 		 * that it is considered part of the Operating System.
5063 		 */
5064 		if (!csproc_get_platform_binary(p)) {
5065 			error = EPERM;
5066 			goto out;
5067 		}
5068 
5069 		if (fp->f_type != DTYPE_VNODE) {
5070 			error = EBADF;
5071 			goto out;
5072 		}
5073 
5074 		vp = (struct vnode *)fp_get_data(fp);
5075 		proc_fdunlock(p);
5076 
5077 		if (vnode_getwithref(vp)) {
5078 			error = ENOENT;
5079 			goto outdrop;
5080 		}
5081 
5082 		/* Verify that vp points to a file and not a directory */
5083 		if (!vnode_isreg(vp)) {
5084 			vnode_put(vp);
5085 			error = EINVAL;
5086 			goto outdrop;
5087 		}
5088 
5089 		/* Only proceed if you have write access */
5090 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
5091 			vnode_put(vp);
5092 			error = EBADF;
5093 			goto outdrop;
5094 		}
5095 
5096 		if (cmd == F_ASSERT_BG_ACCESS) {
5097 			fassertbgaccess_t args;
5098 
5099 			if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
5100 				vnode_put(vp);
5101 				goto outdrop;
5102 			}
5103 
5104 			error = VNOP_IOCTL(vp, F_ASSERT_BG_ACCESS, (caddr_t)&args, 0, &context);
5105 		} else {
5106 			// cmd == F_RELEASE_BG_ACCESS
5107 			error = VNOP_IOCTL(vp, F_RELEASE_BG_ACCESS, (caddr_t)NULL, 0, &context);
5108 		}
5109 
5110 		vnode_put(vp);
5111 
5112 		goto outdrop;
5113 	}
5114 
5115 	default:
5116 		/*
5117 		 * This is an fcntl() that we d not recognize at this level;
5118 		 * if this is a vnode, we send it down into the VNOP_IOCTL
5119 		 * for this vnode; this can include special devices, and will
5120 		 * effectively overload fcntl() to send ioctl()'s.
5121 		 */
5122 		if ((cmd & IOC_VOID) && (cmd & IOC_INOUT)) {
5123 			error = EINVAL;
5124 			goto out;
5125 		}
5126 
5127 		/* Catch any now-invalid fcntl() selectors */
5128 		switch (cmd) {
5129 		case (int)APFSIOC_REVERT_TO_SNAPSHOT:
5130 		case (int)FSIOC_FIOSEEKHOLE:
5131 		case (int)FSIOC_FIOSEEKDATA:
5132 		case (int)FSIOC_CAS_BSDFLAGS:
5133 		case (int)FSIOC_AUTH_FS:
5134 		case HFS_GET_BOOT_INFO:
5135 		case HFS_SET_BOOT_INFO:
5136 		case FIOPINSWAP:
5137 		case F_MARKDEPENDENCY:
5138 		case TIOCREVOKE:
5139 		case TIOCREVOKECLEAR:
5140 			error = EINVAL;
5141 			goto out;
5142 		default:
5143 			break;
5144 		}
5145 
5146 		if (fp->f_type != DTYPE_VNODE) {
5147 			error = EBADF;
5148 			goto out;
5149 		}
5150 		vp = (struct vnode *)fp_get_data(fp);
5151 		proc_fdunlock(p);
5152 
5153 		if ((error = vnode_getwithref(vp)) == 0) {
5154 #define STK_PARAMS 128
5155 			char stkbuf[STK_PARAMS] = {0};
5156 			unsigned int size;
5157 			caddr_t data, memp;
5158 			/*
5159 			 * For this to work properly, we have to copy in the
5160 			 * ioctl() cmd argument if there is one; we must also
5161 			 * check that a command parameter, if present, does
5162 			 * not exceed the maximum command length dictated by
5163 			 * the number of bits we have available in the command
5164 			 * to represent a structure length.  Finally, we have
5165 			 * to copy the results back out, if it is that type of
5166 			 * ioctl().
5167 			 */
5168 			size = IOCPARM_LEN(cmd);
5169 			if (size > IOCPARM_MAX) {
5170 				(void)vnode_put(vp);
5171 				error = EINVAL;
5172 				break;
5173 			}
5174 
5175 			memp = NULL;
5176 			if (size > sizeof(stkbuf)) {
5177 				memp = (caddr_t)kalloc_data(size, Z_WAITOK);
5178 				if (memp == 0) {
5179 					(void)vnode_put(vp);
5180 					error = ENOMEM;
5181 					goto outdrop;
5182 				}
5183 				data = memp;
5184 			} else {
5185 				data = &stkbuf[0];
5186 			}
5187 
5188 			if (cmd & IOC_IN) {
5189 				if (size) {
5190 					/* structure */
5191 					error = copyin(argp, data, size);
5192 					if (error) {
5193 						(void)vnode_put(vp);
5194 						if (memp) {
5195 							kfree_data(memp, size);
5196 						}
5197 						goto outdrop;
5198 					}
5199 
5200 					/* Bzero the section beyond that which was needed */
5201 					if (size <= sizeof(stkbuf)) {
5202 						bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
5203 					}
5204 				} else {
5205 					/* int */
5206 					if (is64bit) {
5207 						*(user_addr_t *)data = argp;
5208 					} else {
5209 						*(uint32_t *)data = (uint32_t)argp;
5210 					}
5211 				};
5212 			} else if ((cmd & IOC_OUT) && size) {
5213 				/*
5214 				 * Zero the buffer so the user always
5215 				 * gets back something deterministic.
5216 				 */
5217 				bzero(data, size);
5218 			} else if (cmd & IOC_VOID) {
5219 				if (is64bit) {
5220 					*(user_addr_t *)data = argp;
5221 				} else {
5222 					*(uint32_t *)data = (uint32_t)argp;
5223 				}
5224 			}
5225 
5226 			error = VNOP_IOCTL(vp, cmd, CAST_DOWN(caddr_t, data), 0, &context);
5227 
5228 			(void)vnode_put(vp);
5229 
5230 			/* Copy any output data to user */
5231 			if (error == 0 && (cmd & IOC_OUT) && size) {
5232 				error = copyout(data, argp, size);
5233 			}
5234 			if (memp) {
5235 				kfree_data(memp, size);
5236 			}
5237 		}
5238 		break;
5239 	}
5240 
5241 outdrop:
5242 	return sys_fcntl_outdrop(p, fd, fp, vp, error);
5243 
5244 out:
5245 	return sys_fcntl_out(p, fd, fp, error);
5246 }
5247 
5248 
5249 /*
5250  * sys_close
5251  *
5252  * Description:	The implementation of the close(2) system call
5253  *
5254  * Parameters:	p			Process in whose per process file table
5255  *					the close is to occur
5256  *		uap->fd			fd to be closed
5257  *		retval			<unused>
5258  *
5259  * Returns:	0			Success
5260  *	fp_lookup:EBADF			Bad file descriptor
5261  *      fp_guard_exception:???          Guarded file descriptor
5262  *	close_internal:EBADF
5263  *	close_internal:???              Anything returnable by a per-fileops
5264  *					close function
5265  */
5266 int
sys_close(proc_t p,struct close_args * uap,__unused int32_t * retval)5267 sys_close(proc_t p, struct close_args *uap, __unused int32_t *retval)
5268 {
5269 	__pthread_testcancel(1);
5270 	return close_nocancel(p, uap->fd);
5271 }
5272 
5273 int
sys_close_nocancel(proc_t p,struct close_nocancel_args * uap,__unused int32_t * retval)5274 sys_close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
5275 {
5276 	return close_nocancel(p, uap->fd);
5277 }
5278 
5279 int
close_nocancel(proc_t p,int fd)5280 close_nocancel(proc_t p, int fd)
5281 {
5282 	struct fileproc *fp;
5283 
5284 	AUDIT_SYSCLOSE(p, fd);
5285 
5286 	proc_fdlock(p);
5287 	if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
5288 		proc_fdunlock(p);
5289 		return EBADF;
5290 	}
5291 
5292 	if (fp_isguarded(fp, GUARD_CLOSE)) {
5293 		int error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
5294 		proc_fdunlock(p);
5295 		return error;
5296 	}
5297 
5298 	return fp_close_and_unlock(p, fd, fp, 0);
5299 }
5300 
5301 
5302 /*
5303  * fstat
5304  *
5305  * Description:	Return status information about a file descriptor.
5306  *
5307  * Parameters:	p				The process doing the fstat
5308  *		fd				The fd to stat
5309  *		ub				The user stat buffer
5310  *		xsecurity			The user extended security
5311  *						buffer, or 0 if none
5312  *		xsecurity_size			The size of xsecurity, or 0
5313  *						if no xsecurity
5314  *		isstat64			Flag to indicate 64 bit version
5315  *						for inode size, etc.
5316  *
5317  * Returns:	0				Success
5318  *		EBADF
5319  *		EFAULT
5320  *	fp_lookup:EBADF				Bad file descriptor
5321  *	vnode_getwithref:???
5322  *	copyout:EFAULT
5323  *	vnode_getwithref:???
5324  *	vn_stat:???
5325  *	soo_stat:???
5326  *	pipe_stat:???
5327  *	pshm_stat:???
5328  *	kqueue_stat:???
5329  *
5330  * Notes:	Internal implementation for all other fstat() related
5331  *		functions
5332  *
5333  *		XXX switch on node type is bogus; need a stat in struct
5334  *		XXX fileops instead.
5335  */
5336 static int
fstat(proc_t p,int fd,user_addr_t ub,user_addr_t xsecurity,user_addr_t xsecurity_size,int isstat64)5337 fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity,
5338     user_addr_t xsecurity_size, int isstat64)
5339 {
5340 	struct fileproc *fp;
5341 	union {
5342 		struct stat sb;
5343 		struct stat64 sb64;
5344 	} source;
5345 	union {
5346 		struct user64_stat user64_sb;
5347 		struct user32_stat user32_sb;
5348 		struct user64_stat64 user64_sb64;
5349 		struct user32_stat64 user32_sb64;
5350 	} dest;
5351 	int error, my_size;
5352 	file_type_t type;
5353 	caddr_t data;
5354 	kauth_filesec_t fsec;
5355 	user_size_t xsecurity_bufsize;
5356 	vfs_context_t ctx = vfs_context_current();
5357 	void * sbptr;
5358 
5359 
5360 	AUDIT_ARG(fd, fd);
5361 
5362 	if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
5363 		return error;
5364 	}
5365 	type = fp->f_type;
5366 	data = (caddr_t)fp_get_data(fp);
5367 	fsec = KAUTH_FILESEC_NONE;
5368 
5369 	sbptr = (void *)&source;
5370 
5371 	switch (type) {
5372 	case DTYPE_VNODE:
5373 		if ((error = vnode_getwithref((vnode_t)data)) == 0) {
5374 			/*
5375 			 * If the caller has the file open, and is not
5376 			 * requesting extended security information, we are
5377 			 * going to let them get the basic stat information.
5378 			 */
5379 			if (xsecurity == USER_ADDR_NULL) {
5380 				error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
5381 				    fp->fp_glob->fg_cred);
5382 			} else {
5383 				error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
5384 			}
5385 
5386 			AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
5387 			(void)vnode_put((vnode_t)data);
5388 		}
5389 		break;
5390 
5391 #if SOCKETS
5392 	case DTYPE_SOCKET:
5393 		error = soo_stat((struct socket *)data, sbptr, isstat64);
5394 		break;
5395 #endif /* SOCKETS */
5396 
5397 	case DTYPE_PIPE:
5398 		error = pipe_stat((void *)data, sbptr, isstat64);
5399 		break;
5400 
5401 	case DTYPE_PSXSHM:
5402 		error = pshm_stat((void *)data, sbptr, isstat64);
5403 		break;
5404 
5405 	case DTYPE_KQUEUE:
5406 		error = kqueue_stat((void *)data, sbptr, isstat64, p);
5407 		break;
5408 
5409 	default:
5410 		error = EBADF;
5411 		goto out;
5412 	}
5413 	if (error == 0) {
5414 		caddr_t sbp;
5415 
5416 		if (isstat64 != 0) {
5417 			source.sb64.st_lspare = 0;
5418 			source.sb64.st_qspare[0] = 0LL;
5419 			source.sb64.st_qspare[1] = 0LL;
5420 
5421 			if (IS_64BIT_PROCESS(p)) {
5422 				munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5423 				my_size = sizeof(dest.user64_sb64);
5424 				sbp = (caddr_t)&dest.user64_sb64;
5425 			} else {
5426 				munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5427 				my_size = sizeof(dest.user32_sb64);
5428 				sbp = (caddr_t)&dest.user32_sb64;
5429 			}
5430 		} else {
5431 			source.sb.st_lspare = 0;
5432 			source.sb.st_qspare[0] = 0LL;
5433 			source.sb.st_qspare[1] = 0LL;
5434 			if (IS_64BIT_PROCESS(p)) {
5435 				munge_user64_stat(&source.sb, &dest.user64_sb);
5436 				my_size = sizeof(dest.user64_sb);
5437 				sbp = (caddr_t)&dest.user64_sb;
5438 			} else {
5439 				munge_user32_stat(&source.sb, &dest.user32_sb);
5440 				my_size = sizeof(dest.user32_sb);
5441 				sbp = (caddr_t)&dest.user32_sb;
5442 			}
5443 		}
5444 
5445 		error = copyout(sbp, ub, my_size);
5446 	}
5447 
5448 	/* caller wants extended security information? */
5449 	if (xsecurity != USER_ADDR_NULL) {
5450 		/* did we get any? */
5451 		if (fsec == KAUTH_FILESEC_NONE) {
5452 			if (susize(xsecurity_size, 0) != 0) {
5453 				error = EFAULT;
5454 				goto out;
5455 			}
5456 		} else {
5457 			/* find the user buffer size */
5458 			xsecurity_bufsize = fusize(xsecurity_size);
5459 
5460 			/* copy out the actual data size */
5461 			if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5462 				error = EFAULT;
5463 				goto out;
5464 			}
5465 
5466 			/* if the caller supplied enough room, copy out to it */
5467 			if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
5468 				error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5469 			}
5470 		}
5471 	}
5472 out:
5473 	fp_drop(p, fd, fp, 0);
5474 	if (fsec != NULL) {
5475 		kauth_filesec_free(fsec);
5476 	}
5477 	return error;
5478 }
5479 
5480 
5481 /*
5482  * sys_fstat_extended
5483  *
5484  * Description:	Extended version of fstat supporting returning extended
5485  *		security information
5486  *
5487  * Parameters:	p				The process doing the fstat
5488  *		uap->fd				The fd to stat
5489  *		uap->ub				The user stat buffer
5490  *		uap->xsecurity			The user extended security
5491  *						buffer, or 0 if none
5492  *		uap->xsecurity_size		The size of xsecurity, or 0
5493  *
5494  * Returns:	0				Success
5495  *		!0				Errno (see fstat)
5496  */
5497 int
sys_fstat_extended(proc_t p,struct fstat_extended_args * uap,__unused int32_t * retval)5498 sys_fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
5499 {
5500 	return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
5501 }
5502 
5503 
5504 /*
5505  * sys_fstat
5506  *
5507  * Description:	Get file status for the file associated with fd
5508  *
5509  * Parameters:	p				The process doing the fstat
5510  *		uap->fd				The fd to stat
5511  *		uap->ub				The user stat buffer
5512  *
5513  * Returns:	0				Success
5514  *		!0				Errno (see fstat)
5515  */
5516 int
sys_fstat(proc_t p,struct fstat_args * uap,__unused int32_t * retval)5517 sys_fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
5518 {
5519 	return fstat(p, uap->fd, uap->ub, 0, 0, 0);
5520 }
5521 
5522 
5523 /*
5524  * sys_fstat64_extended
5525  *
5526  * Description:	Extended version of fstat64 supporting returning extended
5527  *		security information
5528  *
5529  * Parameters:	p				The process doing the fstat
5530  *		uap->fd				The fd to stat
5531  *		uap->ub				The user stat buffer
5532  *		uap->xsecurity			The user extended security
5533  *						buffer, or 0 if none
5534  *		uap->xsecurity_size		The size of xsecurity, or 0
5535  *
5536  * Returns:	0				Success
5537  *		!0				Errno (see fstat)
5538  */
5539 int
sys_fstat64_extended(proc_t p,struct fstat64_extended_args * uap,__unused int32_t * retval)5540 sys_fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
5541 {
5542 	return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
5543 }
5544 
5545 
5546 /*
5547  * sys_fstat64
5548  *
5549  * Description:	Get 64 bit version of the file status for the file associated
5550  *		with fd
5551  *
5552  * Parameters:	p				The process doing the fstat
5553  *		uap->fd				The fd to stat
5554  *		uap->ub				The user stat buffer
5555  *
5556  * Returns:	0				Success
5557  *		!0				Errno (see fstat)
5558  */
5559 int
sys_fstat64(proc_t p,struct fstat64_args * uap,__unused int32_t * retval)5560 sys_fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
5561 {
5562 	return fstat(p, uap->fd, uap->ub, 0, 0, 1);
5563 }
5564 
5565 
5566 /*
5567  * sys_fpathconf
5568  *
5569  * Description:	Return pathconf information about a file descriptor.
5570  *
5571  * Parameters:	p				Process making the request
5572  *		uap->fd				fd to get information about
5573  *		uap->name			Name of information desired
5574  *		retval				Pointer to the call return area
5575  *
5576  * Returns:	0				Success
5577  *		EINVAL
5578  *	fp_lookup:EBADF				Bad file descriptor
5579  *	vnode_getwithref:???
5580  *	vn_pathconf:???
5581  *
5582  * Implicit returns:
5583  *		*retval (modified)		Returned information (numeric)
5584  */
5585 int
sys_fpathconf(proc_t p,struct fpathconf_args * uap,int32_t * retval)5586 sys_fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
5587 {
5588 	int fd = uap->fd;
5589 	struct fileproc *fp;
5590 	struct vnode *vp;
5591 	int error = 0;
5592 	file_type_t type;
5593 
5594 
5595 	AUDIT_ARG(fd, uap->fd);
5596 	if ((error = fp_lookup(p, fd, &fp, 0))) {
5597 		return error;
5598 	}
5599 	type = fp->f_type;
5600 
5601 	switch (type) {
5602 	case DTYPE_SOCKET:
5603 		if (uap->name != _PC_PIPE_BUF) {
5604 			error = EINVAL;
5605 			goto out;
5606 		}
5607 		*retval = PIPE_BUF;
5608 		error = 0;
5609 		goto out;
5610 
5611 	case DTYPE_PIPE:
5612 		if (uap->name != _PC_PIPE_BUF) {
5613 			error = EINVAL;
5614 			goto out;
5615 		}
5616 		*retval = PIPE_BUF;
5617 		error = 0;
5618 		goto out;
5619 
5620 	case DTYPE_VNODE:
5621 		vp = (struct vnode *)fp_get_data(fp);
5622 
5623 		if ((error = vnode_getwithref(vp)) == 0) {
5624 			AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5625 
5626 			error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
5627 
5628 			(void)vnode_put(vp);
5629 		}
5630 		goto out;
5631 
5632 	default:
5633 		error = EINVAL;
5634 		goto out;
5635 	}
5636 	/*NOTREACHED*/
5637 out:
5638 	fp_drop(p, fd, fp, 0);
5639 	return error;
5640 }
5641 
5642 /*
5643  * sys_flock
5644  *
5645  * Description:	Apply an advisory lock on a file descriptor.
5646  *
5647  * Parameters:	p				Process making request
5648  *		uap->fd				fd on which the lock is to be
5649  *						attempted
5650  *		uap->how			(Un)Lock bits, including type
5651  *		retval				Pointer to the call return area
5652  *
5653  * Returns:	0				Success
5654  *	fp_getfvp:EBADF				Bad file descriptor
5655  *	fp_getfvp:ENOTSUP			fd does not refer to a vnode
5656  *	vnode_getwithref:???
5657  *	VNOP_ADVLOCK:???
5658  *
5659  * Implicit returns:
5660  *		*retval (modified)		Size of dtable
5661  *
5662  * Notes:	Just attempt to get a record lock of the requested type on
5663  *		the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5664  */
5665 int
sys_flock(proc_t p,struct flock_args * uap,__unused int32_t * retval)5666 sys_flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5667 {
5668 	int fd = uap->fd;
5669 	int how = uap->how;
5670 	struct fileproc *fp;
5671 	struct vnode *vp;
5672 	struct flock lf;
5673 	vfs_context_t ctx = vfs_context_current();
5674 	int error = 0;
5675 
5676 	AUDIT_ARG(fd, uap->fd);
5677 	if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5678 		return error;
5679 	}
5680 	if ((error = vnode_getwithref(vp))) {
5681 		goto out1;
5682 	}
5683 	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5684 
5685 	lf.l_whence = SEEK_SET;
5686 	lf.l_start = 0;
5687 	lf.l_len = 0;
5688 	if (how & LOCK_UN) {
5689 		lf.l_type = F_UNLCK;
5690 		error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5691 		goto out;
5692 	}
5693 	if (how & LOCK_EX) {
5694 		lf.l_type = F_WRLCK;
5695 	} else if (how & LOCK_SH) {
5696 		lf.l_type = F_RDLCK;
5697 	} else {
5698 		error = EBADF;
5699 		goto out;
5700 	}
5701 #if CONFIG_MACF
5702 	error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob, F_SETLK, &lf);
5703 	if (error) {
5704 		goto out;
5705 	}
5706 #endif
5707 	error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
5708 	    (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5709 	    ctx, NULL);
5710 	if (!error) {
5711 		os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
5712 	}
5713 out:
5714 	(void)vnode_put(vp);
5715 out1:
5716 	fp_drop(p, fd, fp, 0);
5717 	return error;
5718 }
5719 
5720 /*
5721  * sys_fileport_makeport
5722  *
5723  * Description: Obtain a Mach send right for a given file descriptor.
5724  *
5725  * Parameters:	p		Process calling fileport
5726  *              uap->fd		The fd to reference
5727  *              uap->portnamep  User address at which to place port name.
5728  *
5729  * Returns:	0		Success.
5730  *              EBADF		Bad file descriptor.
5731  *              EINVAL		File descriptor had type that cannot be sent, misc. other errors.
5732  *              EFAULT		Address at which to store port name is not valid.
5733  *              EAGAIN		Resource shortage.
5734  *
5735  * Implicit returns:
5736  *		On success, name of send right is stored at user-specified address.
5737  */
5738 int
sys_fileport_makeport(proc_t p,struct fileport_makeport_args * uap,__unused int * retval)5739 sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5740     __unused int *retval)
5741 {
5742 	int err;
5743 	int fd = uap->fd;
5744 	user_addr_t user_portaddr = uap->portnamep;
5745 	struct fileproc *fp = FILEPROC_NULL;
5746 	struct fileglob *fg = NULL;
5747 	ipc_port_t fileport;
5748 	mach_port_name_t name = MACH_PORT_NULL;
5749 
5750 	proc_fdlock(p);
5751 	err = fp_lookup(p, fd, &fp, 1);
5752 	if (err != 0) {
5753 		goto out_unlock;
5754 	}
5755 
5756 	fg = fp->fp_glob;
5757 	if (!fg_sendable(fg)) {
5758 		err = EINVAL;
5759 		goto out_unlock;
5760 	}
5761 
5762 	if (fp_isguarded(fp, GUARD_FILEPORT)) {
5763 		err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5764 		goto out_unlock;
5765 	}
5766 
5767 	/* Dropped when port is deallocated */
5768 	fg_ref(p, fg);
5769 
5770 	proc_fdunlock(p);
5771 
5772 	/* Allocate and initialize a port */
5773 	fileport = fileport_alloc(fg);
5774 	if (fileport == IPC_PORT_NULL) {
5775 		fg_drop_live(fg);
5776 		err = EAGAIN;
5777 		goto out;
5778 	}
5779 
5780 	/* Add an entry.  Deallocates port on failure. */
5781 	name = ipc_port_copyout_send(fileport, get_task_ipcspace(proc_task(p)));
5782 	if (!MACH_PORT_VALID(name)) {
5783 		err = EINVAL;
5784 		goto out;
5785 	}
5786 
5787 	err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5788 	if (err != 0) {
5789 		goto out;
5790 	}
5791 
5792 	/* Tag the fileglob for debugging purposes */
5793 	lck_mtx_lock_spin(&fg->fg_lock);
5794 	fg->fg_lflags |= FG_PORTMADE;
5795 	lck_mtx_unlock(&fg->fg_lock);
5796 
5797 	fp_drop(p, fd, fp, 0);
5798 
5799 	return 0;
5800 
5801 out_unlock:
5802 	proc_fdunlock(p);
5803 out:
5804 	if (MACH_PORT_VALID(name)) {
5805 		/* Don't care if another thread races us to deallocate the entry */
5806 		(void) mach_port_deallocate(get_task_ipcspace(proc_task(p)), name);
5807 	}
5808 
5809 	if (fp != FILEPROC_NULL) {
5810 		fp_drop(p, fd, fp, 0);
5811 	}
5812 
5813 	return err;
5814 }
5815 
5816 void
fileport_releasefg(struct fileglob * fg)5817 fileport_releasefg(struct fileglob *fg)
5818 {
5819 	(void)fg_drop(PROC_NULL, fg);
5820 }
5821 
5822 /*
5823  * fileport_makefd
5824  *
5825  * Description: Obtain the file descriptor for a given Mach send right.
5826  *
5827  * Returns:	0		Success
5828  *		EINVAL		Invalid Mach port name, or port is not for a file.
5829  *	fdalloc:EMFILE
5830  *	fdalloc:ENOMEM		Unable to allocate fileproc or extend file table.
5831  *
5832  * Implicit returns:
5833  *		*retval (modified)		The new descriptor
5834  */
5835 int
fileport_makefd(proc_t p,ipc_port_t port,fileproc_flags_t fp_flags,int * retval)5836 fileport_makefd(proc_t p, ipc_port_t port, fileproc_flags_t fp_flags, int *retval)
5837 {
5838 	struct fileglob *fg;
5839 	struct fileproc *fp = FILEPROC_NULL;
5840 	int fd;
5841 	int err;
5842 
5843 	fg = fileport_port_to_fileglob(port);
5844 	if (fg == NULL) {
5845 		err = EINVAL;
5846 		goto out;
5847 	}
5848 
5849 	fp = fileproc_alloc_init();
5850 
5851 	proc_fdlock(p);
5852 	err = fdalloc(p, 0, &fd);
5853 	if (err != 0) {
5854 		proc_fdunlock(p);
5855 		goto out;
5856 	}
5857 	if (fp_flags) {
5858 		fp->fp_flags |= fp_flags;
5859 	}
5860 
5861 	fp->fp_glob = fg;
5862 	fg_ref(p, fg);
5863 
5864 	procfdtbl_releasefd(p, fd, fp);
5865 	proc_fdunlock(p);
5866 
5867 	*retval = fd;
5868 	err = 0;
5869 out:
5870 	if ((fp != NULL) && (0 != err)) {
5871 		fileproc_free(fp);
5872 	}
5873 
5874 	return err;
5875 }
5876 
5877 /*
5878  * sys_fileport_makefd
5879  *
5880  * Description: Obtain the file descriptor for a given Mach send right.
5881  *
5882  * Parameters:	p		Process calling fileport
5883  *              uap->port	Name of send right to file port.
5884  *
5885  * Returns:	0		Success
5886  *		EINVAL		Invalid Mach port name, or port is not for a file.
5887  *	fdalloc:EMFILE
5888  *	fdalloc:ENOMEM		Unable to allocate fileproc or extend file table.
5889  *
5890  * Implicit returns:
5891  *		*retval (modified)		The new descriptor
5892  */
5893 int
sys_fileport_makefd(proc_t p,struct fileport_makefd_args * uap,int32_t * retval)5894 sys_fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5895 {
5896 	ipc_port_t port = IPC_PORT_NULL;
5897 	mach_port_name_t send = uap->port;
5898 	kern_return_t res;
5899 	int err;
5900 
5901 	res = ipc_object_copyin(get_task_ipcspace(proc_task(p)),
5902 	    send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
5903 
5904 	if (res == KERN_SUCCESS) {
5905 		err = fileport_makefd(p, port, FP_CLOEXEC, retval);
5906 	} else {
5907 		err = EINVAL;
5908 	}
5909 
5910 	if (IPC_PORT_NULL != port) {
5911 		ipc_port_release_send(port);
5912 	}
5913 
5914 	return err;
5915 }
5916 
5917 
5918 #pragma mark fileops wrappers
5919 
5920 /*
5921  * fo_read
5922  *
5923  * Description:	Generic fileops read indirected through the fileops pointer
5924  *		in the fileproc structure
5925  *
5926  * Parameters:	fp				fileproc structure pointer
5927  *		uio				user I/O structure pointer
5928  *		flags				FOF_ flags
5929  *		ctx				VFS context for operation
5930  *
5931  * Returns:	0				Success
5932  *		!0				Errno from read
5933  */
5934 int
fo_read(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5935 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5936 {
5937 	return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
5938 }
5939 
5940 int
fo_no_read(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5941 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5942 {
5943 #pragma unused(fp, uio, flags, ctx)
5944 	return ENXIO;
5945 }
5946 
5947 
5948 /*
5949  * fo_write
5950  *
5951  * Description:	Generic fileops write indirected through the fileops pointer
5952  *		in the fileproc structure
5953  *
5954  * Parameters:	fp				fileproc structure pointer
5955  *		uio				user I/O structure pointer
5956  *		flags				FOF_ flags
5957  *		ctx				VFS context for operation
5958  *
5959  * Returns:	0				Success
5960  *		!0				Errno from write
5961  */
5962 int
fo_write(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5963 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5964 {
5965 	return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
5966 }
5967 
5968 int
fo_no_write(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5969 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5970 {
5971 #pragma unused(fp, uio, flags, ctx)
5972 	return ENXIO;
5973 }
5974 
5975 
5976 /*
5977  * fo_ioctl
5978  *
5979  * Description:	Generic fileops ioctl indirected through the fileops pointer
5980  *		in the fileproc structure
5981  *
5982  * Parameters:	fp				fileproc structure pointer
5983  *		com				ioctl command
5984  *		data				pointer to internalized copy
5985  *						of user space ioctl command
5986  *						parameter data in kernel space
5987  *		ctx				VFS context for operation
5988  *
5989  * Returns:	0				Success
5990  *		!0				Errno from ioctl
5991  *
5992  * Locks:	The caller is assumed to have held the proc_fdlock; this
5993  *		function releases and reacquires this lock.  If the caller
5994  *		accesses data protected by this lock prior to calling this
5995  *		function, it will need to revalidate/reacquire any cached
5996  *		protected data obtained prior to the call.
5997  */
5998 int
fo_ioctl(struct fileproc * fp,u_long com,caddr_t data,vfs_context_t ctx)5999 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
6000 {
6001 	int error;
6002 
6003 	proc_fdunlock(vfs_context_proc(ctx));
6004 	error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
6005 	proc_fdlock(vfs_context_proc(ctx));
6006 	return error;
6007 }
6008 
6009 int
fo_no_ioctl(struct fileproc * fp,u_long com,caddr_t data,vfs_context_t ctx)6010 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
6011 {
6012 #pragma unused(fp, com, data, ctx)
6013 	return ENOTTY;
6014 }
6015 
6016 
6017 /*
6018  * fo_select
6019  *
6020  * Description:	Generic fileops select indirected through the fileops pointer
6021  *		in the fileproc structure
6022  *
6023  * Parameters:	fp				fileproc structure pointer
6024  *		which				select which
6025  *		wql				pointer to wait queue list
6026  *		ctx				VFS context for operation
6027  *
6028  * Returns:	0				Success
6029  *		!0				Errno from select
6030  */
6031 int
fo_select(struct fileproc * fp,int which,void * wql,vfs_context_t ctx)6032 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
6033 {
6034 	return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
6035 }
6036 
6037 int
fo_no_select(struct fileproc * fp,int which,void * wql,vfs_context_t ctx)6038 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
6039 {
6040 #pragma unused(fp, which, wql, ctx)
6041 	return ENOTSUP;
6042 }
6043 
6044 
6045 /*
6046  * fo_close
6047  *
6048  * Description:	Generic fileops close indirected through the fileops pointer
6049  *		in the fileproc structure
6050  *
6051  * Parameters:	fp				fileproc structure pointer for
6052  *						file to close
6053  *		ctx				VFS context for operation
6054  *
6055  * Returns:	0				Success
6056  *		!0				Errno from close
6057  */
6058 int
fo_close(struct fileglob * fg,vfs_context_t ctx)6059 fo_close(struct fileglob *fg, vfs_context_t ctx)
6060 {
6061 	return (*fg->fg_ops->fo_close)(fg, ctx);
6062 }
6063 
6064 
6065 /*
6066  * fo_drain
6067  *
6068  * Description:	Generic fileops kqueue filter indirected through the fileops
6069  *		pointer in the fileproc structure
6070  *
6071  * Parameters:	fp				fileproc structure pointer
6072  *		ctx				VFS context for operation
6073  *
6074  * Returns:	0				Success
6075  *		!0				errno from drain
6076  */
6077 int
fo_drain(struct fileproc * fp,vfs_context_t ctx)6078 fo_drain(struct fileproc *fp, vfs_context_t ctx)
6079 {
6080 	return (*fp->f_ops->fo_drain)(fp, ctx);
6081 }
6082 
6083 int
fo_no_drain(struct fileproc * fp,vfs_context_t ctx)6084 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
6085 {
6086 #pragma unused(fp, ctx)
6087 	return ENOTSUP;
6088 }
6089 
6090 
6091 /*
6092  * fo_kqfilter
6093  *
6094  * Description:	Generic fileops kqueue filter indirected through the fileops
6095  *		pointer in the fileproc structure
6096  *
6097  * Parameters:	fp				fileproc structure pointer
6098  *		kn				pointer to knote to filter on
6099  *
6100  * Returns:	(kn->kn_flags & EV_ERROR)	error in kn->kn_data
6101  *		0				Filter is not active
6102  *		!0				Filter is active
6103  */
6104 int
fo_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)6105 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6106 {
6107 	return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
6108 }
6109 
6110 int
fo_no_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)6111 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6112 {
6113 #pragma unused(fp, kev)
6114 	knote_set_error(kn, ENOTSUP);
6115 	return 0;
6116 }
6117