xref: /xnu-8796.121.2/bsd/kern/kern_descrip.c (revision c54f35ca767986246321eb901baf8f5ff7923f6a)
1 /*
2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  * (c) UNIX System Laboratories, Inc.
33  * All or some portions of this file are derived from material licensed
34  * to the University of California by American Telephone and Telegraph
35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36  * the permission of UNIX System Laboratories, Inc.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. All advertising materials mentioning features or use of this software
47  *    must display the following acknowledgement:
48  *	This product includes software developed by the University of
49  *	California, Berkeley and its contributors.
50  * 4. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)kern_descrip.c	8.8 (Berkeley) 2/14/95
67  */
68 /*
69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70  * support for mandatory and extensible security protections.  This notice
71  * is included in support of clause 2.2 (b) of the Apple Public License,
72  * Version 2.0.
73  */
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/kauth.h>
82 #include <sys/file_internal.h>
83 #include <sys/guarded.h>
84 #include <sys/priv.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/stat.h>
88 #include <sys/ioctl.h>
89 #include <sys/fcntl.h>
90 #include <sys/fsctl.h>
91 #include <sys/malloc.h>
92 #include <sys/mman.h>
93 #include <sys/mount.h>
94 #include <sys/syslog.h>
95 #include <sys/unistd.h>
96 #include <sys/resourcevar.h>
97 #include <sys/aio_kern.h>
98 #include <sys/ev.h>
99 #include <kern/locks.h>
100 #include <sys/uio_internal.h>
101 #include <sys/codesign.h>
102 #include <sys/codedir_internal.h>
103 #include <sys/mount_internal.h>
104 #include <sys/kdebug.h>
105 #include <sys/sysproto.h>
106 #include <sys/pipe.h>
107 #include <sys/spawn.h>
108 #include <sys/cprotect.h>
109 #include <sys/ubc_internal.h>
110 
111 #include <kern/kern_types.h>
112 #include <kern/kalloc.h>
113 #include <kern/waitq.h>
114 #include <kern/ipc_misc.h>
115 #include <kern/ast.h>
116 
117 #include <vm/vm_protos.h>
118 #include <mach/mach_port.h>
119 
120 #include <security/audit/audit.h>
121 #if CONFIG_MACF
122 #include <security/mac_framework.h>
123 #endif
124 
125 #include <stdbool.h>
126 #include <os/atomic_private.h>
127 #include <os/overflow.h>
128 #include <IOKit/IOBSD.h>
129 
130 #define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
131 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
132     mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
133 void ipc_port_release_send(ipc_port_t);
134 
135 void fileport_releasefg(struct fileglob *fg);
136 
137 /* flags for fp_close_and_unlock */
138 #define FD_DUP2RESV 1
139 
140 /* We don't want these exported */
141 
142 __private_extern__
143 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
144 
145 /* Conflict wait queue for when selects collide (opaque type) */
146 extern struct waitq select_conflict_queue;
147 
148 #define f_flag fp_glob->fg_flag
149 #define f_type fp_glob->fg_ops->fo_type
150 #define f_cred fp_glob->fg_cred
151 #define f_ops fp_glob->fg_ops
152 #define f_offset fp_glob->fg_offset
153 
154 ZONE_DEFINE_TYPE(fg_zone, "fileglob", struct fileglob, ZC_ZFREE_CLEARMEM);
155 ZONE_DEFINE_ID(ZONE_ID_FILEPROC, "fileproc", struct fileproc, ZC_ZFREE_CLEARMEM);
156 
157 /*
158  * Descriptor management.
159  */
160 int nfiles;                     /* actual number of open files */
161 /*
162  * "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
163  */
164 static const struct fileops uninitops;
165 
166 os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
167 static LCK_GRP_DECLARE(file_lck_grp, "file");
168 
169 
170 #pragma mark fileglobs
171 
172 /*!
173  * @function fg_free
174  *
175  * @brief
176  * Free a file structure.
177  */
178 static void
fg_free(struct fileglob * fg)179 fg_free(struct fileglob *fg)
180 {
181 	os_atomic_dec(&nfiles, relaxed);
182 
183 	if (fg->fg_vn_data) {
184 		fg_vn_data_free(fg->fg_vn_data);
185 		fg->fg_vn_data = NULL;
186 	}
187 
188 	kauth_cred_t cred = fg->fg_cred;
189 	if (IS_VALID_CRED(cred)) {
190 		kauth_cred_unref(&cred);
191 		fg->fg_cred = NOCRED;
192 	}
193 	lck_mtx_destroy(&fg->fg_lock, &file_lck_grp);
194 
195 #if CONFIG_MACF && CONFIG_VNGUARD
196 	vng_file_label_destroy(fg);
197 #endif
198 	zfree(fg_zone, fg);
199 }
200 
201 OS_ALWAYS_INLINE
202 void
fg_ref(proc_t p,struct fileglob * fg)203 fg_ref(proc_t p, struct fileglob *fg)
204 {
205 #if DEBUG || DEVELOPMENT
206 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
207 #else
208 	(void)p;
209 #endif
210 	os_ref_retain_raw(&fg->fg_count, &f_refgrp);
211 }
212 
213 void
fg_drop_live(struct fileglob * fg)214 fg_drop_live(struct fileglob *fg)
215 {
216 	os_ref_release_live_raw(&fg->fg_count, &f_refgrp);
217 }
218 
219 int
fg_drop(proc_t p,struct fileglob * fg)220 fg_drop(proc_t p, struct fileglob *fg)
221 {
222 	struct vnode *vp;
223 	struct vfs_context context;
224 	int error = 0;
225 
226 	if (fg == NULL) {
227 		return 0;
228 	}
229 
230 	/* Set up context with cred stashed in fg */
231 	if (p == current_proc()) {
232 		context.vc_thread = current_thread();
233 	} else {
234 		context.vc_thread = NULL;
235 	}
236 	context.vc_ucred = fg->fg_cred;
237 
238 	/*
239 	 * POSIX record locking dictates that any close releases ALL
240 	 * locks owned by this process.  This is handled by setting
241 	 * a flag in the unlock to free ONLY locks obeying POSIX
242 	 * semantics, and not to free BSD-style file locks.
243 	 * If the descriptor was in a message, POSIX-style locks
244 	 * aren't passed with the descriptor.
245 	 */
246 	if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
247 	    (p->p_ladvflag & P_LADVLOCK)) {
248 		struct flock lf = {
249 			.l_whence = SEEK_SET,
250 			.l_type = F_UNLCK,
251 		};
252 
253 		vp = (struct vnode *)fg_get_data(fg);
254 		if ((error = vnode_getwithref(vp)) == 0) {
255 			(void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
256 			(void)vnode_put(vp);
257 		}
258 	}
259 
260 	if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == 0) {
261 		/*
262 		 * Since we ensure that fg->fg_ops is always initialized,
263 		 * it is safe to invoke fo_close on the fg
264 		 */
265 		error = fo_close(fg, &context);
266 
267 		fg_free(fg);
268 	}
269 
270 	return error;
271 }
272 
273 inline
274 void
fg_set_data(struct fileglob * fg,void * fg_data)275 fg_set_data(
276 	struct fileglob *fg,
277 	void *fg_data)
278 {
279 	uintptr_t *store = &fg->fg_data;
280 
281 #if __has_feature(ptrauth_calls)
282 	int type = FILEGLOB_DTYPE(fg);
283 
284 	if (fg_data) {
285 		type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
286 		fg_data = ptrauth_sign_unauthenticated(fg_data,
287 		    ptrauth_key_process_independent_data,
288 		    ptrauth_blend_discriminator(store, type));
289 	}
290 #endif // __has_feature(ptrauth_calls)
291 
292 	*store = (uintptr_t)fg_data;
293 }
294 
295 inline
296 void *
fg_get_data_volatile(struct fileglob * fg)297 fg_get_data_volatile(struct fileglob *fg)
298 {
299 	uintptr_t *store = &fg->fg_data;
300 	void *fg_data = (void *)*store;
301 
302 #if __has_feature(ptrauth_calls)
303 	int type = FILEGLOB_DTYPE(fg);
304 
305 	if (fg_data) {
306 		type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
307 		fg_data = ptrauth_auth_data(fg_data,
308 		    ptrauth_key_process_independent_data,
309 		    ptrauth_blend_discriminator(store, type));
310 	}
311 #endif // __has_feature(ptrauth_calls)
312 
313 	return fg_data;
314 }
315 
316 static void
fg_transfer_filelocks(proc_t p,struct fileglob * fg,thread_t thread)317 fg_transfer_filelocks(proc_t p, struct fileglob *fg, thread_t thread)
318 {
319 	struct vnode *vp;
320 	struct vfs_context context;
321 	struct proc *old_proc = current_proc();
322 
323 	assert(fg != NULL);
324 
325 	assert(p != old_proc);
326 	context.vc_thread = thread;
327 	context.vc_ucred = fg->fg_cred;
328 
329 	/* Transfer all POSIX Style locks to new proc */
330 	if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
331 	    (p->p_ladvflag & P_LADVLOCK)) {
332 		struct flock lf = {
333 			.l_whence = SEEK_SET,
334 			.l_start = 0,
335 			.l_len = 0,
336 			.l_type = F_TRANSFER,
337 		};
338 
339 		vp = (struct vnode *)fg_get_data(fg);
340 		if (vnode_getwithref(vp) == 0) {
341 			(void)VNOP_ADVLOCK(vp, (caddr_t)old_proc, F_TRANSFER, &lf, F_POSIX, &context, NULL);
342 			(void)vnode_put(vp);
343 		}
344 	}
345 
346 	/* Transfer all OFD Style locks to new proc */
347 	if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
348 	    (fg->fg_lflags & FG_HAS_OFDLOCK)) {
349 		struct flock lf = {
350 			.l_whence = SEEK_SET,
351 			.l_start = 0,
352 			.l_len = 0,
353 			.l_type = F_TRANSFER,
354 		};
355 
356 		vp = (struct vnode *)fg_get_data(fg);
357 		if (vnode_getwithref(vp) == 0) {
358 			(void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_TRANSFER, &lf, F_OFD_LOCK, &context, NULL);
359 			(void)vnode_put(vp);
360 		}
361 	}
362 	return;
363 }
364 
365 bool
fg_sendable(struct fileglob * fg)366 fg_sendable(struct fileglob *fg)
367 {
368 	switch (FILEGLOB_DTYPE(fg)) {
369 	case DTYPE_VNODE:
370 	case DTYPE_SOCKET:
371 	case DTYPE_PIPE:
372 	case DTYPE_PSXSHM:
373 	case DTYPE_NETPOLICY:
374 		return (fg->fg_lflags & FG_CONFINED) == 0;
375 
376 	default:
377 		return false;
378 	}
379 }
380 
381 #pragma mark file descriptor table (static helpers)
382 
383 static void
procfdtbl_reservefd(struct proc * p,int fd)384 procfdtbl_reservefd(struct proc * p, int fd)
385 {
386 	p->p_fd.fd_ofiles[fd] = NULL;
387 	p->p_fd.fd_ofileflags[fd] |= UF_RESERVED;
388 }
389 
390 void
procfdtbl_releasefd(struct proc * p,int fd,struct fileproc * fp)391 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
392 {
393 	if (fp != NULL) {
394 		p->p_fd.fd_ofiles[fd] = fp;
395 	}
396 	p->p_fd.fd_ofileflags[fd] &= ~UF_RESERVED;
397 	if ((p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
398 		p->p_fd.fd_ofileflags[fd] &= ~UF_RESVWAIT;
399 		wakeup(&p->p_fd);
400 	}
401 }
402 
403 static void
procfdtbl_waitfd(struct proc * p,int fd)404 procfdtbl_waitfd(struct proc * p, int fd)
405 {
406 	p->p_fd.fd_ofileflags[fd] |= UF_RESVWAIT;
407 	msleep(&p->p_fd, &p->p_fd.fd_lock, PRIBIO, "ftbl_waitfd", NULL);
408 }
409 
410 static void
procfdtbl_clearfd(struct proc * p,int fd)411 procfdtbl_clearfd(struct proc * p, int fd)
412 {
413 	int waiting;
414 
415 	waiting = (p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT);
416 	p->p_fd.fd_ofiles[fd] = NULL;
417 	p->p_fd.fd_ofileflags[fd] = 0;
418 	if (waiting == UF_RESVWAIT) {
419 		wakeup(&p->p_fd);
420 	}
421 }
422 
423 /*
424  * fdrelse
425  *
426  * Description:	Inline utility function to free an fd in a filedesc
427  *
428  * Parameters:	fdp				Pointer to filedesc fd lies in
429  *		fd				fd to free
430  *		reserv				fd should be reserved
431  *
432  * Returns:	void
433  *
434  * Locks:	Assumes proc_fdlock for process pointing to fdp is held by
435  *		the caller
436  */
437 void
fdrelse(struct proc * p,int fd)438 fdrelse(struct proc * p, int fd)
439 {
440 	struct filedesc *fdp = &p->p_fd;
441 	int nfd = 0;
442 
443 	if (fd < fdp->fd_freefile) {
444 		fdp->fd_freefile = fd;
445 	}
446 #if DIAGNOSTIC
447 	if (fd >= fdp->fd_afterlast) {
448 		panic("fdrelse: fd_afterlast inconsistent");
449 	}
450 #endif
451 	procfdtbl_clearfd(p, fd);
452 
453 	nfd = fdp->fd_afterlast;
454 	while (nfd > 0 && fdp->fd_ofiles[nfd - 1] == NULL &&
455 	    !(fdp->fd_ofileflags[nfd - 1] & UF_RESERVED)) {
456 		nfd--;
457 	}
458 	fdp->fd_afterlast = nfd;
459 
460 #if CONFIG_PROC_RESOURCE_LIMITS
461 	fdp->fd_nfiles_open--;
462 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
463 }
464 
465 
466 /*
467  * finishdup
468  *
469  * Description:	Common code for dup, dup2, and fcntl(F_DUPFD).
470  *
471  * Parameters:	p				Process performing the dup
472  *		old				The fd to dup
473  *		new				The fd to dup it to
474  *		fp_flags			Flags to augment the new fp
475  *		retval				Pointer to the call return area
476  *
477  * Returns:	0				Success
478  *		EBADF
479  *		ENOMEM
480  *
481  * Implicit returns:
482  *		*retval (modified)		The new descriptor
483  *
484  * Locks:	Assumes proc_fdlock for process pointing to fdp is held by
485  *		the caller
486  *
487  * Notes:	This function may drop and reacquire this lock; it is unsafe
488  *		for a caller to assume that other state protected by the lock
489  *		has not been subsequently changed out from under it.
490  */
491 static int
finishdup(proc_t p,struct filedesc * fdp,int old,int new,fileproc_flags_t fp_flags,int32_t * retval)492 finishdup(proc_t p, struct filedesc *fdp, int old, int new,
493     fileproc_flags_t fp_flags, int32_t *retval)
494 {
495 	struct fileproc *nfp;
496 	struct fileproc *ofp;
497 #if CONFIG_MACF
498 	int error;
499 	kauth_cred_t cred;
500 #endif
501 
502 #if DIAGNOSTIC
503 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
504 #endif
505 	if ((ofp = fdp->fd_ofiles[old]) == NULL ||
506 	    (fdp->fd_ofileflags[old] & UF_RESERVED)) {
507 		fdrelse(p, new);
508 		return EBADF;
509 	}
510 
511 #if CONFIG_MACF
512 	cred = kauth_cred_proc_ref(p);
513 	error = mac_file_check_dup(cred, ofp->fp_glob, new);
514 	kauth_cred_unref(&cred);
515 
516 	if (error) {
517 		fdrelse(p, new);
518 		return error;
519 	}
520 #endif
521 
522 	fg_ref(p, ofp->fp_glob);
523 
524 	proc_fdunlock(p);
525 
526 	nfp = fileproc_alloc_init();
527 
528 	if (fp_flags) {
529 		nfp->fp_flags |= fp_flags;
530 	}
531 	nfp->fp_glob = ofp->fp_glob;
532 
533 	proc_fdlock(p);
534 
535 #if DIAGNOSTIC
536 	if (fdp->fd_ofiles[new] != 0) {
537 		panic("finishdup: overwriting fd_ofiles with new %d", new);
538 	}
539 	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
540 		panic("finishdup: unreserved fileflags with new %d", new);
541 	}
542 #endif
543 
544 	if (new >= fdp->fd_afterlast) {
545 		fdp->fd_afterlast = new + 1;
546 	}
547 	procfdtbl_releasefd(p, new, nfp);
548 	*retval = new;
549 	return 0;
550 }
551 
552 
553 #pragma mark file descriptor table (exported functions)
554 
555 void
proc_dirs_lock_shared(proc_t p)556 proc_dirs_lock_shared(proc_t p)
557 {
558 	lck_rw_lock_shared(&p->p_fd.fd_dirs_lock);
559 }
560 
561 void
proc_dirs_unlock_shared(proc_t p)562 proc_dirs_unlock_shared(proc_t p)
563 {
564 	lck_rw_unlock_shared(&p->p_fd.fd_dirs_lock);
565 }
566 
567 void
proc_dirs_lock_exclusive(proc_t p)568 proc_dirs_lock_exclusive(proc_t p)
569 {
570 	lck_rw_lock_exclusive(&p->p_fd.fd_dirs_lock);
571 }
572 
573 void
proc_dirs_unlock_exclusive(proc_t p)574 proc_dirs_unlock_exclusive(proc_t p)
575 {
576 	lck_rw_unlock_exclusive(&p->p_fd.fd_dirs_lock);
577 }
578 
579 /*
580  * proc_fdlock, proc_fdlock_spin
581  *
582  * Description:	Lock to control access to the per process struct fileproc
583  *		and struct filedesc
584  *
585  * Parameters:	p				Process to take the lock on
586  *
587  * Returns:	void
588  *
589  * Notes:	The lock is initialized in forkproc() and destroyed in
590  *		reap_child_process().
591  */
592 void
proc_fdlock(proc_t p)593 proc_fdlock(proc_t p)
594 {
595 	lck_mtx_lock(&p->p_fd.fd_lock);
596 }
597 
598 void
proc_fdlock_spin(proc_t p)599 proc_fdlock_spin(proc_t p)
600 {
601 	lck_mtx_lock_spin(&p->p_fd.fd_lock);
602 }
603 
604 void
proc_fdlock_assert(proc_t p,int assertflags)605 proc_fdlock_assert(proc_t p, int assertflags)
606 {
607 	lck_mtx_assert(&p->p_fd.fd_lock, assertflags);
608 }
609 
610 
611 /*
612  * proc_fdunlock
613  *
614  * Description:	Unlock the lock previously locked by a call to proc_fdlock()
615  *
616  * Parameters:	p				Process to drop the lock on
617  *
618  * Returns:	void
619  */
620 void
proc_fdunlock(proc_t p)621 proc_fdunlock(proc_t p)
622 {
623 	lck_mtx_unlock(&p->p_fd.fd_lock);
624 }
625 
626 bool
fdt_available_locked(proc_t p,int n)627 fdt_available_locked(proc_t p, int n)
628 {
629 	struct filedesc *fdp = &p->p_fd;
630 	struct fileproc **fpp;
631 	char *flags;
632 	int i;
633 	int lim = proc_limitgetcur_nofile(p);
634 
635 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
636 		return true;
637 	}
638 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
639 	flags = &fdp->fd_ofileflags[fdp->fd_freefile];
640 	for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
641 		if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
642 			return true;
643 		}
644 	}
645 	return false;
646 }
647 
648 
649 struct fdt_iterator
fdt_next(proc_t p,int fd,bool only_settled)650 fdt_next(proc_t p, int fd, bool only_settled)
651 {
652 	struct fdt_iterator it;
653 	struct filedesc *fdp = &p->p_fd;
654 	struct fileproc *fp;
655 	int nfds = fdp->fd_afterlast;
656 
657 	while (++fd < nfds) {
658 		fp = fdp->fd_ofiles[fd];
659 		if (fp == NULL || fp->fp_glob == NULL) {
660 			continue;
661 		}
662 		if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
663 			continue;
664 		}
665 		it.fdti_fd = fd;
666 		it.fdti_fp = fp;
667 		return it;
668 	}
669 
670 	it.fdti_fd = nfds;
671 	it.fdti_fp = NULL;
672 	return it;
673 }
674 
675 struct fdt_iterator
fdt_prev(proc_t p,int fd,bool only_settled)676 fdt_prev(proc_t p, int fd, bool only_settled)
677 {
678 	struct fdt_iterator it;
679 	struct filedesc *fdp = &p->p_fd;
680 	struct fileproc *fp;
681 
682 	while (--fd >= 0) {
683 		fp = fdp->fd_ofiles[fd];
684 		if (fp == NULL || fp->fp_glob == NULL) {
685 			continue;
686 		}
687 		if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
688 			continue;
689 		}
690 		it.fdti_fd = fd;
691 		it.fdti_fp = fp;
692 		return it;
693 	}
694 
695 	it.fdti_fd = -1;
696 	it.fdti_fp = NULL;
697 	return it;
698 }
699 
700 void
fdt_init(proc_t p)701 fdt_init(proc_t p)
702 {
703 	struct filedesc *fdp = &p->p_fd;
704 
705 	lck_mtx_init(&fdp->fd_kqhashlock, &proc_kqhashlock_grp, &proc_lck_attr);
706 	lck_mtx_init(&fdp->fd_knhashlock, &proc_knhashlock_grp, &proc_lck_attr);
707 	lck_mtx_init(&fdp->fd_lock, &proc_fdmlock_grp, &proc_lck_attr);
708 	lck_rw_init(&fdp->fd_dirs_lock, &proc_dirslock_grp, &proc_lck_attr);
709 }
710 
711 void
fdt_destroy(proc_t p)712 fdt_destroy(proc_t p)
713 {
714 	struct filedesc *fdp = &p->p_fd;
715 
716 	lck_mtx_destroy(&fdp->fd_kqhashlock, &proc_kqhashlock_grp);
717 	lck_mtx_destroy(&fdp->fd_knhashlock, &proc_knhashlock_grp);
718 	lck_mtx_destroy(&fdp->fd_lock, &proc_fdmlock_grp);
719 	lck_rw_destroy(&fdp->fd_dirs_lock, &proc_dirslock_grp);
720 }
721 
722 void
fdt_exec(proc_t p,short posix_spawn_flags,thread_t thread,bool in_exec)723 fdt_exec(proc_t p, short posix_spawn_flags, thread_t thread, bool in_exec)
724 {
725 	struct filedesc *fdp = &p->p_fd;
726 	thread_t self = current_thread();
727 	struct uthread *ut = get_bsdthread_info(self);
728 	struct kqworkq *dealloc_kqwq = NULL;
729 
730 	/*
731 	 * If the current thread is bound as a workq/workloop
732 	 * servicing thread, we need to unbind it first.
733 	 */
734 	if (ut->uu_kqr_bound && get_bsdthreadtask_info(self) == p) {
735 		kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
736 	}
737 
738 	/*
739 	 * Deallocate the knotes for this process
740 	 * and mark the tables non-existent so
741 	 * subsequent kqueue closes go faster.
742 	 */
743 	knotes_dealloc(p);
744 	assert(fdp->fd_knlistsize == 0);
745 	assert(fdp->fd_knhashmask == 0);
746 
747 	proc_fdlock(p);
748 
749 	/* Set the P_LADVLOCK flag if the flag set on old proc */
750 	if (in_exec && (current_proc()->p_ladvflag & P_LADVLOCK)) {
751 		os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
752 	}
753 
754 	for (int i = fdp->fd_afterlast; i-- > 0;) {
755 		struct fileproc *fp = fdp->fd_ofiles[i];
756 		char *flagp = &fdp->fd_ofileflags[i];
757 		bool inherit_file = true;
758 
759 		if (fp == FILEPROC_NULL) {
760 			continue;
761 		}
762 
763 		/*
764 		 * no file descriptor should be in flux when in exec,
765 		 * because we stopped all other threads
766 		 */
767 		if (*flagp & ~UF_INHERIT) {
768 			panic("file %d/%p in flux during exec of %p", i, fp, p);
769 		}
770 
771 		if (fp->fp_flags & FP_CLOEXEC) {
772 			inherit_file = false;
773 		} else if ((posix_spawn_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) &&
774 		    !(*flagp & UF_INHERIT)) {
775 			/*
776 			 * Reverse the usual semantics of file descriptor
777 			 * inheritance - all of them should be closed
778 			 * except files marked explicitly as "inherit" and
779 			 * not marked close-on-exec.
780 			 */
781 			inherit_file = false;
782 #if CONFIG_MACF
783 		} else if (mac_file_check_inherit(proc_ucred(p), fp->fp_glob)) {
784 			inherit_file = false;
785 #endif
786 		}
787 
788 		*flagp = 0; /* clear UF_INHERIT */
789 
790 		if (!inherit_file) {
791 			fp_close_and_unlock(p, i, fp, 0);
792 			proc_fdlock(p);
793 		} else if (in_exec) {
794 			/* Transfer F_POSIX style lock to new proc */
795 			proc_fdunlock(p);
796 			fg_transfer_filelocks(p, fp->fp_glob, thread);
797 			proc_fdlock(p);
798 		}
799 	}
800 
801 	/* release the per-process workq kq */
802 	if (fdp->fd_wqkqueue) {
803 		dealloc_kqwq = fdp->fd_wqkqueue;
804 		fdp->fd_wqkqueue = NULL;
805 	}
806 
807 	proc_fdunlock(p);
808 
809 	/* Anything to free? */
810 	if (dealloc_kqwq) {
811 		kqworkq_dealloc(dealloc_kqwq);
812 	}
813 }
814 
815 
816 int
fdt_fork(struct filedesc * newfdp,proc_t p,vnode_t uth_cdir,bool in_exec)817 fdt_fork(struct filedesc *newfdp, proc_t p, vnode_t uth_cdir, bool in_exec)
818 {
819 	struct filedesc *fdp = &p->p_fd;
820 	struct fileproc **ofiles;
821 	char *ofileflags;
822 	int n_files, afterlast, freefile;
823 	vnode_t v_dir;
824 #if CONFIG_PROC_RESOURCE_LIMITS
825 	int fd_nfiles_open = 0;
826 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
827 	proc_fdlock(p);
828 
829 	newfdp->fd_flags = (fdp->fd_flags & FILEDESC_FORK_INHERITED_MASK);
830 	newfdp->fd_cmask = fdp->fd_cmask;
831 #if CONFIG_PROC_RESOURCE_LIMITS
832 	newfdp->fd_nfiles_soft_limit = fdp->fd_nfiles_soft_limit;
833 	newfdp->fd_nfiles_hard_limit = fdp->fd_nfiles_hard_limit;
834 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
835 
836 	/*
837 	 * For both fd_cdir and fd_rdir make sure we get
838 	 * a valid reference... if we can't, than set
839 	 * set the pointer(s) to NULL in the child... this
840 	 * will keep us from using a non-referenced vp
841 	 * and allows us to do the vnode_rele only on
842 	 * a properly referenced vp
843 	 */
844 	if ((v_dir = fdp->fd_rdir)) {
845 		if (vnode_getwithref(v_dir) == 0) {
846 			if (vnode_ref(v_dir) == 0) {
847 				newfdp->fd_rdir = v_dir;
848 			}
849 			vnode_put(v_dir);
850 		}
851 		if (newfdp->fd_rdir == NULL) {
852 			/*
853 			 * We couldn't get a new reference on
854 			 * the chroot directory being
855 			 * inherited... this is fatal, since
856 			 * otherwise it would constitute an
857 			 * escape from a chroot environment by
858 			 * the new process.
859 			 */
860 			proc_fdunlock(p);
861 			return EPERM;
862 		}
863 	}
864 
865 	/*
866 	 * If we are running with per-thread current working directories,
867 	 * inherit the new current working directory from the current thread.
868 	 */
869 	if ((v_dir = uth_cdir ? uth_cdir : fdp->fd_cdir)) {
870 		if (vnode_getwithref(v_dir) == 0) {
871 			if (vnode_ref(v_dir) == 0) {
872 				newfdp->fd_cdir = v_dir;
873 			}
874 			vnode_put(v_dir);
875 		}
876 		if (newfdp->fd_cdir == NULL && v_dir == fdp->fd_cdir) {
877 			/*
878 			 * we couldn't get a new reference on
879 			 * the current working directory being
880 			 * inherited... we might as well drop
881 			 * our reference from the parent also
882 			 * since the vnode has gone DEAD making
883 			 * it useless... by dropping it we'll
884 			 * be that much closer to recycling it
885 			 */
886 			vnode_rele(fdp->fd_cdir);
887 			fdp->fd_cdir = NULL;
888 		}
889 	}
890 
891 	/*
892 	 * If the number of open files fits in the internal arrays
893 	 * of the open file structure, use them, otherwise allocate
894 	 * additional memory for the number of descriptors currently
895 	 * in use.
896 	 */
897 	afterlast = fdp->fd_afterlast;
898 	freefile = fdp->fd_freefile;
899 	if (afterlast <= NDFILE) {
900 		n_files = NDFILE;
901 	} else {
902 		n_files = roundup(afterlast, NDEXTENT);
903 	}
904 
905 	proc_fdunlock(p);
906 
907 	ofiles = kalloc_type(struct fileproc *, n_files, Z_WAITOK | Z_ZERO);
908 	ofileflags = kalloc_data(n_files, Z_WAITOK | Z_ZERO);
909 	if (ofiles == NULL || ofileflags == NULL) {
910 		kfree_type(struct fileproc *, n_files, ofiles);
911 		kfree_data(ofileflags, n_files);
912 		if (newfdp->fd_cdir) {
913 			vnode_rele(newfdp->fd_cdir);
914 			newfdp->fd_cdir = NULL;
915 		}
916 		if (newfdp->fd_rdir) {
917 			vnode_rele(newfdp->fd_rdir);
918 			newfdp->fd_rdir = NULL;
919 		}
920 		return ENOMEM;
921 	}
922 
923 	proc_fdlock(p);
924 
925 	for (int i = afterlast; i-- > 0;) {
926 		struct fileproc *ofp, *nfp;
927 		char flags;
928 
929 		ofp = fdp->fd_ofiles[i];
930 		flags = fdp->fd_ofileflags[i];
931 
932 		if (ofp == NULL ||
933 		    (ofp->fp_glob->fg_lflags & FG_CONFINED) ||
934 		    ((ofp->fp_flags & FP_CLOFORK) && !in_exec) ||
935 		    ((ofp->fp_flags & FP_CLOEXEC) && in_exec) ||
936 		    (flags & UF_RESERVED)) {
937 			if (i + 1 == afterlast) {
938 				afterlast = i;
939 			}
940 			if (i < freefile) {
941 				freefile = i;
942 			}
943 
944 			continue;
945 		}
946 
947 		nfp = fileproc_alloc_init();
948 		nfp->fp_glob = ofp->fp_glob;
949 		if (in_exec) {
950 			nfp->fp_flags = (ofp->fp_flags & (FP_CLOEXEC | FP_CLOFORK));
951 			if (ofp->fp_guard_attrs) {
952 				guarded_fileproc_copy_guard(ofp, nfp);
953 			}
954 		} else {
955 			assert(ofp->fp_guard_attrs == 0);
956 			nfp->fp_flags = (ofp->fp_flags & FP_CLOEXEC);
957 		}
958 		fg_ref(p, nfp->fp_glob);
959 
960 		ofiles[i] = nfp;
961 #if CONFIG_PROC_RESOURCE_LIMITS
962 		fd_nfiles_open++;
963 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
964 	}
965 
966 	proc_fdunlock(p);
967 
968 	newfdp->fd_ofiles = ofiles;
969 	newfdp->fd_ofileflags = ofileflags;
970 	newfdp->fd_nfiles = n_files;
971 	newfdp->fd_afterlast = afterlast;
972 	newfdp->fd_freefile = freefile;
973 
974 #if CONFIG_PROC_RESOURCE_LIMITS
975 	newfdp->fd_nfiles_open = fd_nfiles_open;
976 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
977 
978 	return 0;
979 }
980 
981 void
fdt_invalidate(proc_t p)982 fdt_invalidate(proc_t p)
983 {
984 	struct filedesc *fdp = &p->p_fd;
985 	struct fileproc *fp, **ofiles;
986 	char *ofileflags;
987 	struct kqworkq *kqwq = NULL;
988 	vnode_t vn1 = NULL, vn2 = NULL;
989 	struct kqwllist *kqhash = NULL;
990 	u_long kqhashmask = 0;
991 	int n_files = 0;
992 
993 	/*
994 	 * deallocate all the knotes up front and claim empty
995 	 * tables to make any subsequent kqueue closes faster.
996 	 */
997 	knotes_dealloc(p);
998 	assert(fdp->fd_knlistsize == 0);
999 	assert(fdp->fd_knhashmask == 0);
1000 
1001 	/*
1002 	 * dealloc all workloops that have outstanding retains
1003 	 * when created with scheduling parameters.
1004 	 */
1005 	kqworkloops_dealloc(p);
1006 
1007 	proc_fdlock(p);
1008 
1009 	/* close file descriptors */
1010 	if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
1011 		for (int i = fdp->fd_afterlast; i-- > 0;) {
1012 			if ((fp = fdp->fd_ofiles[i]) != NULL) {
1013 				if (fdp->fd_ofileflags[i] & UF_RESERVED) {
1014 					panic("fdfree: found fp with UF_RESERVED");
1015 				}
1016 				fp_close_and_unlock(p, i, fp, 0);
1017 				proc_fdlock(p);
1018 			}
1019 		}
1020 	}
1021 
1022 	n_files = fdp->fd_nfiles;
1023 	ofileflags = fdp->fd_ofileflags;
1024 	ofiles = fdp->fd_ofiles;
1025 	kqwq = fdp->fd_wqkqueue;
1026 	vn1 = fdp->fd_cdir;
1027 	vn2 = fdp->fd_rdir;
1028 
1029 	fdp->fd_ofileflags = NULL;
1030 	fdp->fd_ofiles = NULL;
1031 	fdp->fd_nfiles = 0;
1032 	fdp->fd_wqkqueue = NULL;
1033 	fdp->fd_cdir = NULL;
1034 	fdp->fd_rdir = NULL;
1035 
1036 	proc_fdunlock(p);
1037 
1038 	lck_mtx_lock(&fdp->fd_knhashlock);
1039 
1040 	kqhash = fdp->fd_kqhash;
1041 	kqhashmask = fdp->fd_kqhashmask;
1042 
1043 	fdp->fd_kqhash = 0;
1044 	fdp->fd_kqhashmask = 0;
1045 
1046 	lck_mtx_unlock(&fdp->fd_knhashlock);
1047 
1048 	kfree_type(struct fileproc *, n_files, ofiles);
1049 	kfree_data(ofileflags, n_files);
1050 
1051 	if (kqwq) {
1052 		kqworkq_dealloc(kqwq);
1053 	}
1054 	if (vn1) {
1055 		vnode_rele(vn1);
1056 	}
1057 	if (vn2) {
1058 		vnode_rele(vn2);
1059 	}
1060 	if (kqhash) {
1061 		for (uint32_t i = 0; i <= kqhashmask; i++) {
1062 			assert(LIST_EMPTY(&kqhash[i]));
1063 		}
1064 		hashdestroy(kqhash, M_KQUEUE, kqhashmask);
1065 	}
1066 }
1067 
1068 
1069 struct fileproc *
fileproc_alloc_init(void)1070 fileproc_alloc_init(void)
1071 {
1072 	struct fileproc *fp;
1073 
1074 	fp = zalloc_id(ZONE_ID_FILEPROC, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1075 	os_ref_init(&fp->fp_iocount, &f_refgrp);
1076 	return fp;
1077 }
1078 
1079 
1080 void
fileproc_free(struct fileproc * fp)1081 fileproc_free(struct fileproc *fp)
1082 {
1083 	os_ref_count_t __unused refc = os_ref_release(&fp->fp_iocount);
1084 #if DEVELOPMENT || DEBUG
1085 	if (0 != refc) {
1086 		panic("%s: pid %d refc: %u != 0",
1087 		    __func__, proc_pid(current_proc()), refc);
1088 	}
1089 #endif
1090 	if (fp->fp_guard_attrs) {
1091 		guarded_fileproc_unguard(fp);
1092 	}
1093 	assert(fp->fp_wset == NULL);
1094 	zfree_id(ZONE_ID_FILEPROC, fp);
1095 }
1096 
1097 
1098 /*
1099  * Statistics counter for the number of times a process calling fdalloc()
1100  * has resulted in an expansion of the per process open file table.
1101  *
1102  * XXX This would likely be of more use if it were per process
1103  */
1104 int fdexpand;
1105 
1106 #if CONFIG_PROC_RESOURCE_LIMITS
1107 /*
1108  * Should be called only with the proc_fdlock held.
1109  */
1110 void
fd_check_limit_exceeded(struct filedesc * fdp)1111 fd_check_limit_exceeded(struct filedesc *fdp)
1112 {
1113 #if DIAGNOSTIC
1114 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1115 #endif
1116 	if (!fd_above_soft_limit_notify(fdp) && fdp->fd_nfiles_soft_limit &&
1117 	    (fdp->fd_nfiles_open > fdp->fd_nfiles_soft_limit)) {
1118 		fd_above_soft_limit_send_notification(fdp);
1119 		act_set_astproc_resource(current_thread());
1120 	} else if (!fd_above_hard_limit_notify(fdp) && fdp->fd_nfiles_hard_limit &&
1121 	    (fdp->fd_nfiles_open > fdp->fd_nfiles_hard_limit)) {
1122 		fd_above_hard_limit_send_notification(fdp);
1123 		act_set_astproc_resource(current_thread());
1124 	}
1125 }
1126 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1127 
1128 /*
1129  * fdalloc
1130  *
1131  * Description:	Allocate a file descriptor for the process.
1132  *
1133  * Parameters:	p				Process to allocate the fd in
1134  *		want				The fd we would prefer to get
1135  *		result				Pointer to fd we got
1136  *
1137  * Returns:	0				Success
1138  *		EMFILE
1139  *		ENOMEM
1140  *
1141  * Implicit returns:
1142  *		*result (modified)		The fd which was allocated
1143  */
1144 int
fdalloc(proc_t p,int want,int * result)1145 fdalloc(proc_t p, int want, int *result)
1146 {
1147 	struct filedesc *fdp = &p->p_fd;
1148 	int i;
1149 	int last, numfiles, oldnfiles;
1150 	struct fileproc **newofiles;
1151 	char *newofileflags;
1152 	int lim = proc_limitgetcur_nofile(p);
1153 
1154 	/*
1155 	 * Search for a free descriptor starting at the higher
1156 	 * of want or fd_freefile.  If that fails, consider
1157 	 * expanding the ofile array.
1158 	 */
1159 #if DIAGNOSTIC
1160 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1161 #endif
1162 
1163 	for (;;) {
1164 		last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
1165 		if ((i = want) < fdp->fd_freefile) {
1166 			i = fdp->fd_freefile;
1167 		}
1168 		for (; i < last; i++) {
1169 			if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
1170 				procfdtbl_reservefd(p, i);
1171 				if (i >= fdp->fd_afterlast) {
1172 					fdp->fd_afterlast = i + 1;
1173 				}
1174 				if (want <= fdp->fd_freefile) {
1175 					fdp->fd_freefile = i;
1176 				}
1177 				*result = i;
1178 #if CONFIG_PROC_RESOURCE_LIMITS
1179 				fdp->fd_nfiles_open++;
1180 				fd_check_limit_exceeded(fdp);
1181 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1182 				return 0;
1183 			}
1184 		}
1185 
1186 		/*
1187 		 * No space in current array.  Expand?
1188 		 */
1189 		if ((rlim_t)fdp->fd_nfiles >= lim) {
1190 			return EMFILE;
1191 		}
1192 		if (fdp->fd_nfiles < NDEXTENT) {
1193 			numfiles = NDEXTENT;
1194 		} else {
1195 			numfiles = 2 * fdp->fd_nfiles;
1196 		}
1197 		/* Enforce lim */
1198 		if ((rlim_t)numfiles > lim) {
1199 			numfiles = (int)lim;
1200 		}
1201 		proc_fdunlock(p);
1202 		newofiles = kalloc_type(struct fileproc *, numfiles, Z_WAITOK | Z_ZERO);
1203 		newofileflags = kalloc_data(numfiles, Z_WAITOK | Z_ZERO);
1204 		proc_fdlock(p);
1205 		if (newofileflags == NULL || newofiles == NULL) {
1206 			kfree_type(struct fileproc *, numfiles, newofiles);
1207 			kfree_data(newofileflags, numfiles);
1208 			return ENOMEM;
1209 		}
1210 		if (fdp->fd_nfiles >= numfiles) {
1211 			kfree_type(struct fileproc *, numfiles, newofiles);
1212 			kfree_data(newofileflags, numfiles);
1213 			continue;
1214 		}
1215 
1216 		/*
1217 		 * Copy the existing ofile and ofileflags arrays
1218 		 * and zero the new portion of each array.
1219 		 */
1220 		oldnfiles = fdp->fd_nfiles;
1221 		memcpy(newofiles, fdp->fd_ofiles,
1222 		    oldnfiles * sizeof(*fdp->fd_ofiles));
1223 		memcpy(newofileflags, fdp->fd_ofileflags, oldnfiles);
1224 
1225 		kfree_type(struct fileproc *, oldnfiles, fdp->fd_ofiles);
1226 		kfree_data(fdp->fd_ofileflags, oldnfiles);
1227 		fdp->fd_ofiles = newofiles;
1228 		fdp->fd_ofileflags = newofileflags;
1229 		fdp->fd_nfiles = numfiles;
1230 		fdexpand++;
1231 	}
1232 }
1233 
1234 
1235 #pragma mark fileprocs
1236 
1237 void
fileproc_modify_vflags(struct fileproc * fp,fileproc_vflags_t vflags,boolean_t clearflags)1238 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
1239 {
1240 	if (clearflags) {
1241 		os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
1242 	} else {
1243 		os_atomic_or(&fp->fp_vflags, vflags, relaxed);
1244 	}
1245 }
1246 
1247 fileproc_vflags_t
fileproc_get_vflags(struct fileproc * fp)1248 fileproc_get_vflags(struct fileproc *fp)
1249 {
1250 	return os_atomic_load(&fp->fp_vflags, relaxed);
1251 }
1252 
1253 /*
1254  * falloc_withinit
1255  *
1256  * Create a new open file structure and allocate
1257  * a file descriptor for the process that refers to it.
1258  *
1259  * Returns:	0			Success
1260  *
1261  * Description:	Allocate an entry in the per process open file table and
1262  *		return the corresponding fileproc and fd.
1263  *
1264  * Parameters:	p				The process in whose open file
1265  *						table the fd is to be allocated
1266  *		resultfp			Pointer to fileproc pointer
1267  *						return area
1268  *		resultfd			Pointer to fd return area
1269  *		ctx				VFS context
1270  *		fp_zalloc			fileproc allocator to use
1271  *		crarg				allocator args
1272  *
1273  * Returns:	0				Success
1274  *		ENFILE				Too many open files in system
1275  *		fdalloc:EMFILE			Too many open files in process
1276  *		fdalloc:ENOMEM			M_OFILETABL zone exhausted
1277  *		ENOMEM				fp_zone or fg_zone zone
1278  *						exhausted
1279  *
1280  * Implicit returns:
1281  *		*resultfd (modified)		Returned fileproc pointer
1282  *		*resultfd (modified)		Returned fd
1283  *
1284  * Notes:	This function takes separate process and context arguments
1285  *		solely to support kern_exec.c; otherwise, it would take
1286  *		neither, and use the vfs_context_current() routine internally.
1287  */
1288 int
falloc_withinit(proc_t p,struct fileproc ** resultfp,int * resultfd,vfs_context_t ctx,fp_initfn_t fp_init,void * initarg)1289 falloc_withinit(proc_t p, struct fileproc **resultfp, int *resultfd,
1290     vfs_context_t ctx, fp_initfn_t fp_init, void *initarg)
1291 {
1292 	struct fileproc *fp;
1293 	struct fileglob *fg;
1294 	int error, nfd;
1295 #if CONFIG_MACF
1296 	kauth_cred_t cred;
1297 #endif
1298 
1299 	/* Make sure we don't go beyond the system-wide limit */
1300 	if (nfiles >= maxfiles) {
1301 		tablefull("file");
1302 		return ENFILE;
1303 	}
1304 
1305 	proc_fdlock(p);
1306 
1307 	/* fdalloc will make sure the process stays below per-process limit */
1308 	if ((error = fdalloc(p, 0, &nfd))) {
1309 		proc_fdunlock(p);
1310 		return error;
1311 	}
1312 
1313 #if CONFIG_MACF
1314 	cred = kauth_cred_proc_ref(p);
1315 	error = mac_file_check_create(cred);
1316 	kauth_cred_unref(&cred);
1317 	if (error) {
1318 		proc_fdunlock(p);
1319 		return error;
1320 	}
1321 #endif
1322 
1323 	/*
1324 	 * Allocate a new file descriptor.
1325 	 * If the process has file descriptor zero open, add to the list
1326 	 * of open files at that point, otherwise put it at the front of
1327 	 * the list of open files.
1328 	 */
1329 	proc_fdunlock(p);
1330 
1331 	fp = fileproc_alloc_init();
1332 	if (fp_init) {
1333 		fp_init(fp, initarg);
1334 	}
1335 
1336 	fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
1337 	lck_mtx_init(&fg->fg_lock, &file_lck_grp, LCK_ATTR_NULL);
1338 
1339 	os_ref_retain_locked(&fp->fp_iocount);
1340 	os_ref_init_raw(&fg->fg_count, &f_refgrp);
1341 	fg->fg_ops = &uninitops;
1342 	fp->fp_glob = fg;
1343 
1344 	kauth_cred_ref(ctx->vc_ucred);
1345 
1346 	fp->f_cred = ctx->vc_ucred;
1347 
1348 	os_atomic_inc(&nfiles, relaxed);
1349 
1350 	proc_fdlock(p);
1351 
1352 	p->p_fd.fd_ofiles[nfd] = fp;
1353 
1354 	proc_fdunlock(p);
1355 
1356 	if (resultfp) {
1357 		*resultfp = fp;
1358 	}
1359 	if (resultfd) {
1360 		*resultfd = nfd;
1361 	}
1362 
1363 	return 0;
1364 }
1365 
1366 int
falloc(proc_t p,struct fileproc ** resultfp,int * resultfd,vfs_context_t ctx)1367 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
1368 {
1369 	return falloc_withinit(p, resultfp, resultfd, ctx, NULL, NULL);
1370 }
1371 
1372 
1373 /*
1374  * fp_free
1375  *
1376  * Description:	Release the fd and free the fileproc associated with the fd
1377  *		in the per process open file table of the specified process;
1378  *		these values must correspond.
1379  *
1380  * Parameters:	p				Process containing fd
1381  *		fd				fd to be released
1382  *		fp				fileproc to be freed
1383  */
1384 void
fp_free(proc_t p,int fd,struct fileproc * fp)1385 fp_free(proc_t p, int fd, struct fileproc * fp)
1386 {
1387 	proc_fdlock_spin(p);
1388 	fdrelse(p, fd);
1389 	proc_fdunlock(p);
1390 
1391 	fg_free(fp->fp_glob);
1392 	os_ref_release_live(&fp->fp_iocount);
1393 	fileproc_free(fp);
1394 }
1395 
1396 
1397 struct fileproc *
fp_get_noref_locked(proc_t p,int fd)1398 fp_get_noref_locked(proc_t p, int fd)
1399 {
1400 	struct filedesc *fdp = &p->p_fd;
1401 	struct fileproc *fp;
1402 
1403 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1404 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1405 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1406 		return NULL;
1407 	}
1408 
1409 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1410 	return fp;
1411 }
1412 
1413 struct fileproc *
fp_get_noref_locked_with_iocount(proc_t p,int fd)1414 fp_get_noref_locked_with_iocount(proc_t p, int fd)
1415 {
1416 	struct filedesc *fdp = &p->p_fd;
1417 	struct fileproc *fp = NULL;
1418 
1419 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1420 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1421 	    os_ref_get_count(&fp->fp_iocount) <= 1 ||
1422 	    ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1423 	    !(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
1424 		panic("%s: caller without an ioccount on fileproc (%d/:%p)",
1425 		    __func__, fd, fp);
1426 	}
1427 
1428 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1429 	return fp;
1430 }
1431 
1432 
1433 /*
1434  * fp_lookup
1435  *
1436  * Description:	Get fileproc pointer for a given fd from the per process
1437  *		open file table of the specified process and if successful,
1438  *		increment the fp_iocount
1439  *
1440  * Parameters:	p				Process in which fd lives
1441  *		fd				fd to get information for
1442  *		resultfp			Pointer to result fileproc
1443  *						pointer area, or 0 if none
1444  *		locked				!0 if the caller holds the
1445  *						proc_fdlock, 0 otherwise
1446  *
1447  * Returns:	0			Success
1448  *		EBADF			Bad file descriptor
1449  *
1450  * Implicit returns:
1451  *		*resultfp (modified)		Fileproc pointer
1452  *
1453  * Locks:	If the argument 'locked' is non-zero, then the caller is
1454  *		expected to have taken and held the proc_fdlock; if it is
1455  *		zero, than this routine internally takes and drops this lock.
1456  */
1457 int
fp_lookup(proc_t p,int fd,struct fileproc ** resultfp,int locked)1458 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
1459 {
1460 	struct filedesc *fdp = &p->p_fd;
1461 	struct fileproc *fp;
1462 
1463 	if (!locked) {
1464 		proc_fdlock_spin(p);
1465 	}
1466 	if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
1467 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1468 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1469 		if (!locked) {
1470 			proc_fdunlock(p);
1471 		}
1472 		return EBADF;
1473 	}
1474 
1475 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1476 	os_ref_retain_locked(&fp->fp_iocount);
1477 
1478 	if (resultfp) {
1479 		*resultfp = fp;
1480 	}
1481 	if (!locked) {
1482 		proc_fdunlock(p);
1483 	}
1484 
1485 	return 0;
1486 }
1487 
1488 
1489 int
fp_get_ftype(proc_t p,int fd,file_type_t ftype,int err,struct fileproc ** fpp)1490 fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
1491 {
1492 	struct filedesc *fdp = &p->p_fd;
1493 	struct fileproc *fp;
1494 
1495 	proc_fdlock_spin(p);
1496 	if (fd < 0 || fd >= fdp->fd_nfiles ||
1497 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1498 	    (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1499 		proc_fdunlock(p);
1500 		return EBADF;
1501 	}
1502 
1503 	if (fp->f_type != ftype) {
1504 		proc_fdunlock(p);
1505 		return err;
1506 	}
1507 
1508 	zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1509 	os_ref_retain_locked(&fp->fp_iocount);
1510 	proc_fdunlock(p);
1511 
1512 	*fpp = fp;
1513 	return 0;
1514 }
1515 
1516 
1517 /*
1518  * fp_drop
1519  *
1520  * Description:	Drop the I/O reference previously taken by calling fp_lookup
1521  *		et. al.
1522  *
1523  * Parameters:	p				Process in which the fd lives
1524  *		fd				fd associated with the fileproc
1525  *		fp				fileproc on which to set the
1526  *						flag and drop the reference
1527  *		locked				flag to internally take and
1528  *						drop proc_fdlock if it is not
1529  *						already held by the caller
1530  *
1531  * Returns:	0				Success
1532  *		EBADF				Bad file descriptor
1533  *
1534  * Locks:	This function internally takes and drops the proc_fdlock for
1535  *		the supplied process if 'locked' is non-zero, and assumes that
1536  *		the caller already holds this lock if 'locked' is non-zero.
1537  *
1538  * Notes:	The fileproc must correspond to the fd in the supplied proc
1539  */
1540 int
fp_drop(proc_t p,int fd,struct fileproc * fp,int locked)1541 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
1542 {
1543 	struct filedesc *fdp = &p->p_fd;
1544 	int     needwakeup = 0;
1545 
1546 	if (!locked) {
1547 		proc_fdlock_spin(p);
1548 	}
1549 	if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
1550 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
1551 	    ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1552 	    !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
1553 		if (!locked) {
1554 			proc_fdunlock(p);
1555 		}
1556 		return EBADF;
1557 	}
1558 
1559 	if (1 == os_ref_release_locked(&fp->fp_iocount)) {
1560 		if (fp->fp_flags & FP_SELCONFLICT) {
1561 			fp->fp_flags &= ~FP_SELCONFLICT;
1562 		}
1563 
1564 		if (fdp->fd_fpdrainwait) {
1565 			fdp->fd_fpdrainwait = 0;
1566 			needwakeup = 1;
1567 		}
1568 	}
1569 	if (!locked) {
1570 		proc_fdunlock(p);
1571 	}
1572 	if (needwakeup) {
1573 		wakeup(&fdp->fd_fpdrainwait);
1574 	}
1575 
1576 	return 0;
1577 }
1578 
1579 
1580 /*
1581  * fileproc_drain
1582  *
1583  * Description:	Drain out pending I/O operations
1584  *
1585  * Parameters:	p				Process closing this file
1586  *		fp				fileproc struct for the open
1587  *						instance on the file
1588  *
1589  * Returns:	void
1590  *
1591  * Locks:	Assumes the caller holds the proc_fdlock
1592  *
1593  * Notes:	For character devices, this occurs on the last close of the
1594  *		device; for all other file descriptors, this occurs on each
1595  *		close to prevent fd's from being closed out from under
1596  *		operations currently in progress and blocked
1597  *
1598  * See Also:    file_vnode(), file_socket(), file_drop(), and the cautions
1599  *		regarding their use and interaction with this function.
1600  */
1601 static void
fileproc_drain(proc_t p,struct fileproc * fp)1602 fileproc_drain(proc_t p, struct fileproc * fp)
1603 {
1604 	struct filedesc *fdp = &p->p_fd;
1605 	struct vfs_context context;
1606 	thread_t thread;
1607 	bool is_current_proc;
1608 
1609 	is_current_proc = (p == current_proc());
1610 
1611 	if (!is_current_proc) {
1612 		proc_lock(p);
1613 		thread = proc_thread(p); /* XXX */
1614 		thread_reference(thread);
1615 		proc_unlock(p);
1616 	} else {
1617 		thread = current_thread();
1618 	}
1619 
1620 	context.vc_thread = thread;
1621 	context.vc_ucred = fp->fp_glob->fg_cred;
1622 
1623 	/* Set the vflag for drain */
1624 	fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
1625 
1626 	while (os_ref_get_count(&fp->fp_iocount) > 1) {
1627 		lck_mtx_convert_spin(&fdp->fd_lock);
1628 
1629 		fo_drain(fp, &context);
1630 		if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
1631 			struct select_set *selset;
1632 
1633 			if (fp->fp_guard_attrs) {
1634 				selset = fp->fp_guard->fpg_wset;
1635 			} else {
1636 				selset = fp->fp_wset;
1637 			}
1638 			if (waitq_wakeup64_all(selset, NO_EVENT64,
1639 			    THREAD_INTERRUPTED, WAITQ_WAKEUP_DEFAULT) == KERN_INVALID_ARGUMENT) {
1640 				panic("bad wait queue for waitq_wakeup64_all %p (%sfp:%p)",
1641 				    selset, fp->fp_guard_attrs ? "guarded " : "", fp);
1642 			}
1643 		}
1644 		if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1645 			if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
1646 			    THREAD_INTERRUPTED, WAITQ_WAKEUP_DEFAULT) == KERN_INVALID_ARGUMENT) {
1647 				panic("bad select_conflict_queue");
1648 			}
1649 		}
1650 		fdp->fd_fpdrainwait = 1;
1651 		msleep(&fdp->fd_fpdrainwait, &fdp->fd_lock, PRIBIO, "fpdrain", NULL);
1652 	}
1653 #if DIAGNOSTIC
1654 	if ((fp->fp_flags & FP_INSELECT) != 0) {
1655 		panic("FP_INSELECT set on drained fp");
1656 	}
1657 #endif
1658 	if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1659 		fp->fp_flags &= ~FP_SELCONFLICT;
1660 	}
1661 
1662 	if (!is_current_proc) {
1663 		thread_deallocate(thread);
1664 	}
1665 }
1666 
1667 
1668 int
fp_close_and_unlock(proc_t p,int fd,struct fileproc * fp,int flags)1669 fp_close_and_unlock(proc_t p, int fd, struct fileproc *fp, int flags)
1670 {
1671 	struct filedesc *fdp = &p->p_fd;
1672 	struct fileglob *fg = fp->fp_glob;
1673 #if CONFIG_MACF
1674 	kauth_cred_t cred;
1675 #endif
1676 
1677 #if DIAGNOSTIC
1678 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1679 #endif
1680 
1681 	/*
1682 	 * Keep most people from finding the filedesc while we are closing it.
1683 	 *
1684 	 * Callers are:
1685 	 *
1686 	 * - dup2() which always waits for UF_RESERVED to clear
1687 	 *
1688 	 * - close/guarded_close/... who will fail the fileproc lookup if
1689 	 *   UF_RESERVED is set,
1690 	 *
1691 	 * - fdexec()/fdfree() who only run once all threads in the proc
1692 	 *   are properly canceled, hence no fileproc in this proc should
1693 	 *   be in flux.
1694 	 *
1695 	 * Which means that neither UF_RESERVED nor UF_CLOSING should be set.
1696 	 *
1697 	 * Callers of fp_get_noref_locked_with_iocount() can still find
1698 	 * this entry so that they can drop their I/O reference despite
1699 	 * not having remembered the fileproc pointer (namely select() and
1700 	 * file_drop()).
1701 	 */
1702 	if (p->p_fd.fd_ofileflags[fd] & (UF_RESERVED | UF_CLOSING)) {
1703 		panic("%s: called with fileproc in flux (%d/:%p)",
1704 		    __func__, fd, fp);
1705 	}
1706 	p->p_fd.fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
1707 
1708 	if ((fp->fp_flags & FP_AIOISSUED) ||
1709 #if CONFIG_MACF
1710 	    (FILEGLOB_DTYPE(fg) == DTYPE_VNODE)
1711 #else
1712 	    kauth_authorize_fileop_has_listeners()
1713 #endif
1714 	    ) {
1715 		proc_fdunlock(p);
1716 
1717 		if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1718 			/*
1719 			 * call out to allow 3rd party notification of close.
1720 			 * Ignore result of kauth_authorize_fileop call.
1721 			 */
1722 #if CONFIG_MACF
1723 			cred = kauth_cred_proc_ref(p);
1724 			mac_file_notify_close(cred, fp->fp_glob);
1725 			kauth_cred_unref(&cred);
1726 #endif
1727 
1728 			if (kauth_authorize_fileop_has_listeners() &&
1729 			    vnode_getwithref((vnode_t)fg_get_data(fg)) == 0) {
1730 				u_int   fileop_flags = 0;
1731 				if (fg->fg_flag & FWASWRITTEN) {
1732 					fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
1733 				}
1734 				kauth_authorize_fileop(fg->fg_cred, KAUTH_FILEOP_CLOSE,
1735 				    (uintptr_t)fg_get_data(fg), (uintptr_t)fileop_flags);
1736 
1737 				vnode_put((vnode_t)fg_get_data(fg));
1738 			}
1739 		}
1740 
1741 		if (fp->fp_flags & FP_AIOISSUED) {
1742 			/*
1743 			 * cancel all async IO requests that can be cancelled.
1744 			 */
1745 			_aio_close( p, fd );
1746 		}
1747 
1748 		proc_fdlock(p);
1749 	}
1750 
1751 	if (fd < fdp->fd_knlistsize) {
1752 		knote_fdclose(p, fd);
1753 	}
1754 
1755 	fileproc_drain(p, fp);
1756 
1757 	if (flags & FD_DUP2RESV) {
1758 		fdp->fd_ofiles[fd] = NULL;
1759 		fdp->fd_ofileflags[fd] &= ~UF_CLOSING;
1760 	} else {
1761 		fdrelse(p, fd);
1762 	}
1763 
1764 	proc_fdunlock(p);
1765 
1766 	if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
1767 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
1768 		    fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fg_get_data(fg)));
1769 	}
1770 
1771 	fileproc_free(fp);
1772 
1773 	return fg_drop(p, fg);
1774 }
1775 
1776 /*
1777  * dupfdopen
1778  *
1779  * Description:	Duplicate the specified descriptor to a free descriptor;
1780  *		this is the second half of fdopen(), above.
1781  *
1782  * Parameters:	p				current process pointer
1783  *		indx				fd to dup to
1784  *		dfd				fd to dup from
1785  *		mode				mode to set on new fd
1786  *		error				command code
1787  *
1788  * Returns:	0				Success
1789  *		EBADF				Source fd is bad
1790  *		EACCES				Requested mode not allowed
1791  *		!0				'error', if not ENODEV or
1792  *						ENXIO
1793  *
1794  * Notes:	XXX This is not thread safe; see fdopen() above
1795  */
1796 int
dupfdopen(proc_t p,int indx,int dfd,int flags,int error)1797 dupfdopen(proc_t p, int indx, int dfd, int flags, int error)
1798 {
1799 	struct filedesc *fdp = &p->p_fd;
1800 	struct fileproc *wfp;
1801 	struct fileproc *fp;
1802 #if CONFIG_MACF
1803 	int myerror;
1804 #endif
1805 
1806 	/*
1807 	 * If the to-be-dup'd fd number is greater than the allowed number
1808 	 * of file descriptors, or the fd to be dup'd has already been
1809 	 * closed, reject.  Note, check for new == old is necessary as
1810 	 * falloc could allocate an already closed to-be-dup'd descriptor
1811 	 * as the new descriptor.
1812 	 */
1813 	proc_fdlock(p);
1814 
1815 	fp = fdp->fd_ofiles[indx];
1816 	if (dfd < 0 || dfd >= fdp->fd_nfiles ||
1817 	    (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
1818 	    (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
1819 		proc_fdunlock(p);
1820 		return EBADF;
1821 	}
1822 #if CONFIG_MACF
1823 	myerror = mac_file_check_dup(kauth_cred_get(), wfp->fp_glob, dfd);
1824 	if (myerror) {
1825 		proc_fdunlock(p);
1826 		return myerror;
1827 	}
1828 #endif
1829 	/*
1830 	 * There are two cases of interest here.
1831 	 *
1832 	 * For ENODEV simply dup (dfd) to file descriptor
1833 	 * (indx) and return.
1834 	 *
1835 	 * For ENXIO steal away the file structure from (dfd) and
1836 	 * store it in (indx).  (dfd) is effectively closed by
1837 	 * this operation.
1838 	 *
1839 	 * Any other error code is just returned.
1840 	 */
1841 	switch (error) {
1842 	case ENODEV:
1843 		if (fp_isguarded(wfp, GUARD_DUP)) {
1844 			proc_fdunlock(p);
1845 			return EPERM;
1846 		}
1847 
1848 		/*
1849 		 * Check that the mode the file is being opened for is a
1850 		 * subset of the mode of the existing descriptor.
1851 		 */
1852 		if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1853 			proc_fdunlock(p);
1854 			return EACCES;
1855 		}
1856 		if (indx >= fdp->fd_afterlast) {
1857 			fdp->fd_afterlast = indx + 1;
1858 		}
1859 
1860 		if (fp->fp_glob) {
1861 			fg_free(fp->fp_glob);
1862 		}
1863 		fg_ref(p, wfp->fp_glob);
1864 		fp->fp_glob = wfp->fp_glob;
1865 		/*
1866 		 * Historically, open(/dev/fd/<n>) preserves close on fork/exec,
1867 		 * unlike dup(), dup2() or fcntl(F_DUPFD).
1868 		 *
1869 		 * open1() already handled O_CLO{EXEC,FORK}
1870 		 */
1871 		fp->fp_flags |= (wfp->fp_flags & (FP_CLOFORK | FP_CLOEXEC));
1872 
1873 		procfdtbl_releasefd(p, indx, NULL);
1874 		fp_drop(p, indx, fp, 1);
1875 		proc_fdunlock(p);
1876 		return 0;
1877 
1878 	default:
1879 		proc_fdunlock(p);
1880 		return error;
1881 	}
1882 	/* NOTREACHED */
1883 }
1884 
1885 
1886 #pragma mark KPIS (sys/file.h)
1887 
1888 /*
1889  * fg_get_vnode
1890  *
1891  * Description:	Return vnode associated with the file structure, if
1892  *		any.  The lifetime of the returned vnode is bound to
1893  *		the lifetime of the file structure.
1894  *
1895  * Parameters:	fg				Pointer to fileglob to
1896  *						inspect
1897  *
1898  * Returns:	vnode_t
1899  */
1900 vnode_t
fg_get_vnode(struct fileglob * fg)1901 fg_get_vnode(struct fileglob *fg)
1902 {
1903 	if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1904 		return (vnode_t)fg_get_data(fg);
1905 	} else {
1906 		return NULL;
1907 	}
1908 }
1909 
1910 
1911 /*
1912  * fp_getfvp
1913  *
1914  * Description:	Get fileproc and vnode pointer for a given fd from the per
1915  *		process open file table of the specified process, and if
1916  *		successful, increment the fp_iocount
1917  *
1918  * Parameters:	p				Process in which fd lives
1919  *		fd				fd to get information for
1920  *		resultfp			Pointer to result fileproc
1921  *						pointer area, or 0 if none
1922  *		resultvp			Pointer to result vnode pointer
1923  *						area, or 0 if none
1924  *
1925  * Returns:	0				Success
1926  *		EBADF				Bad file descriptor
1927  *		ENOTSUP				fd does not refer to a vnode
1928  *
1929  * Implicit returns:
1930  *		*resultfp (modified)		Fileproc pointer
1931  *		*resultvp (modified)		vnode pointer
1932  *
1933  * Notes:	The resultfp and resultvp fields are optional, and may be
1934  *		independently specified as NULL to skip returning information
1935  *
1936  * Locks:	Internally takes and releases proc_fdlock
1937  */
1938 int
fp_getfvp(proc_t p,int fd,struct fileproc ** resultfp,struct vnode ** resultvp)1939 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
1940 {
1941 	struct fileproc *fp;
1942 	int error;
1943 
1944 	error = fp_get_ftype(p, fd, DTYPE_VNODE, ENOTSUP, &fp);
1945 	if (error == 0) {
1946 		if (resultfp) {
1947 			*resultfp = fp;
1948 		}
1949 		if (resultvp) {
1950 			*resultvp = (struct vnode *)fp_get_data(fp);
1951 		}
1952 	}
1953 
1954 	return error;
1955 }
1956 
1957 
1958 /*
1959  * fp_get_pipe_id
1960  *
1961  * Description:	Get pipe id for a given fd from the per process open file table
1962  *		of the specified process.
1963  *
1964  * Parameters:	p				Process in which fd lives
1965  *		fd				fd to get information for
1966  *		result_pipe_id			Pointer to result pipe id
1967  *
1968  * Returns:	0				Success
1969  *		EIVAL				NULL pointer arguments passed
1970  *		fp_lookup:EBADF			Bad file descriptor
1971  *		ENOTSUP				fd does not refer to a pipe
1972  *
1973  * Implicit returns:
1974  *		*result_pipe_id (modified)	pipe id
1975  *
1976  * Locks:	Internally takes and releases proc_fdlock
1977  */
1978 int
fp_get_pipe_id(proc_t p,int fd,uint64_t * result_pipe_id)1979 fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
1980 {
1981 	struct fileproc *fp = FILEPROC_NULL;
1982 	struct fileglob *fg = NULL;
1983 	int error = 0;
1984 
1985 	if (p == NULL || result_pipe_id == NULL) {
1986 		return EINVAL;
1987 	}
1988 
1989 	proc_fdlock(p);
1990 	if ((error = fp_lookup(p, fd, &fp, 1))) {
1991 		proc_fdunlock(p);
1992 		return error;
1993 	}
1994 	fg = fp->fp_glob;
1995 
1996 	if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
1997 		*result_pipe_id = pipe_id((struct pipe*)fg_get_data(fg));
1998 	} else {
1999 		error = ENOTSUP;
2000 	}
2001 
2002 	fp_drop(p, fd, fp, 1);
2003 	proc_fdunlock(p);
2004 	return error;
2005 }
2006 
2007 
2008 /*
2009  * file_vnode
2010  *
2011  * Description:	Given an fd, look it up in the current process's per process
2012  *		open file table, and return its internal vnode pointer.
2013  *
2014  * Parameters:	fd				fd to obtain vnode from
2015  *		vpp				pointer to vnode return area
2016  *
2017  * Returns:	0				Success
2018  *		EINVAL				The fd does not refer to a
2019  *						vnode fileproc entry
2020  *	fp_lookup:EBADF				Bad file descriptor
2021  *
2022  * Implicit returns:
2023  *		*vpp (modified)			Returned vnode pointer
2024  *
2025  * Locks:	This function internally takes and drops the proc_fdlock for
2026  *		the current process
2027  *
2028  * Notes:	If successful, this function increments the fp_iocount on the
2029  *		fd's corresponding fileproc.
2030  *
2031  *		The fileproc referenced is not returned; because of this, care
2032  *		must be taken to not drop the last reference (e.g. by closing
2033  *		the file).  This is inherently unsafe, since the reference may
2034  *		not be recoverable from the vnode, if there is a subsequent
2035  *		close that destroys the associate fileproc.  The caller should
2036  *		therefore retain their own reference on the fileproc so that
2037  *		the fp_iocount can be dropped subsequently.  Failure to do this
2038  *		can result in the returned pointer immediately becoming invalid
2039  *		following the call.
2040  *
2041  *		Use of this function is discouraged.
2042  */
2043 int
file_vnode(int fd,struct vnode ** vpp)2044 file_vnode(int fd, struct vnode **vpp)
2045 {
2046 	return file_vnode_withvid(fd, vpp, NULL);
2047 }
2048 
2049 
2050 /*
2051  * file_vnode_withvid
2052  *
2053  * Description:	Given an fd, look it up in the current process's per process
2054  *		open file table, and return its internal vnode pointer.
2055  *
2056  * Parameters:	fd				fd to obtain vnode from
2057  *		vpp				pointer to vnode return area
2058  *		vidp				pointer to vid of the returned vnode
2059  *
2060  * Returns:	0				Success
2061  *		EINVAL				The fd does not refer to a
2062  *						vnode fileproc entry
2063  *	fp_lookup:EBADF				Bad file descriptor
2064  *
2065  * Implicit returns:
2066  *		*vpp (modified)			Returned vnode pointer
2067  *
2068  * Locks:	This function internally takes and drops the proc_fdlock for
2069  *		the current process
2070  *
2071  * Notes:	If successful, this function increments the fp_iocount on the
2072  *		fd's corresponding fileproc.
2073  *
2074  *		The fileproc referenced is not returned; because of this, care
2075  *		must be taken to not drop the last reference (e.g. by closing
2076  *		the file).  This is inherently unsafe, since the reference may
2077  *		not be recoverable from the vnode, if there is a subsequent
2078  *		close that destroys the associate fileproc.  The caller should
2079  *		therefore retain their own reference on the fileproc so that
2080  *		the fp_iocount can be dropped subsequently.  Failure to do this
2081  *		can result in the returned pointer immediately becoming invalid
2082  *		following the call.
2083  *
2084  *		Use of this function is discouraged.
2085  */
2086 int
file_vnode_withvid(int fd,struct vnode ** vpp,uint32_t * vidp)2087 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t *vidp)
2088 {
2089 	struct fileproc *fp;
2090 	int error;
2091 
2092 	error = fp_get_ftype(current_proc(), fd, DTYPE_VNODE, EINVAL, &fp);
2093 	if (error == 0) {
2094 		if (vpp) {
2095 			*vpp = (struct vnode *)fp_get_data(fp);
2096 		}
2097 		if (vidp) {
2098 			*vidp = vnode_vid((struct vnode *)fp_get_data(fp));
2099 		}
2100 	}
2101 	return error;
2102 }
2103 
2104 /*
2105  * file_socket
2106  *
2107  * Description:	Given an fd, look it up in the current process's per process
2108  *		open file table, and return its internal socket pointer.
2109  *
2110  * Parameters:	fd				fd to obtain vnode from
2111  *		sp				pointer to socket return area
2112  *
2113  * Returns:	0				Success
2114  *		ENOTSOCK			Not a socket
2115  *		fp_lookup:EBADF			Bad file descriptor
2116  *
2117  * Implicit returns:
2118  *		*sp (modified)			Returned socket pointer
2119  *
2120  * Locks:	This function internally takes and drops the proc_fdlock for
2121  *		the current process
2122  *
2123  * Notes:	If successful, this function increments the fp_iocount on the
2124  *		fd's corresponding fileproc.
2125  *
2126  *		The fileproc referenced is not returned; because of this, care
2127  *		must be taken to not drop the last reference (e.g. by closing
2128  *		the file).  This is inherently unsafe, since the reference may
2129  *		not be recoverable from the socket, if there is a subsequent
2130  *		close that destroys the associate fileproc.  The caller should
2131  *		therefore retain their own reference on the fileproc so that
2132  *		the fp_iocount can be dropped subsequently.  Failure to do this
2133  *		can result in the returned pointer immediately becoming invalid
2134  *		following the call.
2135  *
2136  *		Use of this function is discouraged.
2137  */
2138 int
file_socket(int fd,struct socket ** sp)2139 file_socket(int fd, struct socket **sp)
2140 {
2141 	struct fileproc *fp;
2142 	int error;
2143 
2144 	error = fp_get_ftype(current_proc(), fd, DTYPE_SOCKET, ENOTSOCK, &fp);
2145 	if (error == 0) {
2146 		if (sp) {
2147 			*sp = (struct socket *)fp_get_data(fp);
2148 		}
2149 	}
2150 	return error;
2151 }
2152 
2153 
2154 /*
2155  * file_flags
2156  *
2157  * Description:	Given an fd, look it up in the current process's per process
2158  *		open file table, and return its fileproc's flags field.
2159  *
2160  * Parameters:	fd				fd whose flags are to be
2161  *						retrieved
2162  *		flags				pointer to flags data area
2163  *
2164  * Returns:	0				Success
2165  *		ENOTSOCK			Not a socket
2166  *		fp_lookup:EBADF			Bad file descriptor
2167  *
2168  * Implicit returns:
2169  *		*flags (modified)		Returned flags field
2170  *
2171  * Locks:	This function internally takes and drops the proc_fdlock for
2172  *		the current process
2173  */
2174 int
file_flags(int fd,int * flags)2175 file_flags(int fd, int *flags)
2176 {
2177 	proc_t p = current_proc();
2178 	struct fileproc *fp;
2179 	int error = EBADF;
2180 
2181 	proc_fdlock_spin(p);
2182 	fp = fp_get_noref_locked(p, fd);
2183 	if (fp) {
2184 		*flags = (int)fp->f_flag;
2185 		error = 0;
2186 	}
2187 	proc_fdunlock(p);
2188 
2189 	return error;
2190 }
2191 
2192 
2193 /*
2194  * file_drop
2195  *
2196  * Description:	Drop an iocount reference on an fd, and wake up any waiters
2197  *		for draining (i.e. blocked in fileproc_drain() called during
2198  *		the last attempt to close a file).
2199  *
2200  * Parameters:	fd				fd on which an ioreference is
2201  *						to be dropped
2202  *
2203  * Returns:	0				Success
2204  *
2205  * Description:	Given an fd, look it up in the current process's per process
2206  *		open file table, and drop it's fileproc's fp_iocount by one
2207  *
2208  * Notes:	This is intended as a corresponding operation to the functions
2209  *		file_vnode() and file_socket() operations.
2210  *
2211  *		If the caller can't possibly hold an I/O reference,
2212  *		this function will panic the kernel rather than allowing
2213  *		for memory corruption. Callers should always call this
2214  *		because they acquired an I/O reference on this file before.
2215  *
2216  *		Use of this function is discouraged.
2217  */
2218 int
file_drop(int fd)2219 file_drop(int fd)
2220 {
2221 	struct fileproc *fp;
2222 	proc_t p = current_proc();
2223 	struct filedesc *fdp = &p->p_fd;
2224 	int     needwakeup = 0;
2225 
2226 	proc_fdlock_spin(p);
2227 	fp = fp_get_noref_locked_with_iocount(p, fd);
2228 
2229 	if (1 == os_ref_release_locked(&fp->fp_iocount)) {
2230 		if (fp->fp_flags & FP_SELCONFLICT) {
2231 			fp->fp_flags &= ~FP_SELCONFLICT;
2232 		}
2233 
2234 		if (fdp->fd_fpdrainwait) {
2235 			fdp->fd_fpdrainwait = 0;
2236 			needwakeup = 1;
2237 		}
2238 	}
2239 	proc_fdunlock(p);
2240 
2241 	if (needwakeup) {
2242 		wakeup(&fdp->fd_fpdrainwait);
2243 	}
2244 	return 0;
2245 }
2246 
2247 
2248 #pragma mark syscalls
2249 
2250 #ifndef HFS_GET_BOOT_INFO
2251 #define HFS_GET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00004)
2252 #endif
2253 
2254 #ifndef HFS_SET_BOOT_INFO
2255 #define HFS_SET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00005)
2256 #endif
2257 
2258 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
2259 #define APFSIOC_REVERT_TO_SNAPSHOT  _IOW('J', 1, u_int64_t)
2260 #endif
2261 
2262 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
2263 	        (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
2264 	        (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
2265 	        ? 1 : 0)
2266 
2267 /*
2268  * sys_getdtablesize
2269  *
2270  * Description:	Returns the per process maximum size of the descriptor table
2271  *
2272  * Parameters:	p				Process being queried
2273  *		retval				Pointer to the call return area
2274  *
2275  * Returns:	0				Success
2276  *
2277  * Implicit returns:
2278  *		*retval (modified)		Size of dtable
2279  */
2280 int
sys_getdtablesize(proc_t p,__unused struct getdtablesize_args * uap,int32_t * retval)2281 sys_getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
2282 {
2283 	*retval = proc_limitgetcur_nofile(p);
2284 	return 0;
2285 }
2286 
2287 
2288 /*
2289  * check_file_seek_range
2290  *
2291  * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
2292  *
2293  * Parameters:  fl		Flock structure.
2294  *		cur_file_offset	Current offset in the file.
2295  *
2296  * Returns:     0               on Success.
2297  *		EOVERFLOW	on overflow.
2298  *		EINVAL          on offset less than zero.
2299  */
2300 
2301 static int
check_file_seek_range(struct flock * fl,off_t cur_file_offset)2302 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
2303 {
2304 	if (fl->l_whence == SEEK_CUR) {
2305 		/* Check if the start marker is beyond LLONG_MAX. */
2306 		if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
2307 			/* Check if start marker is negative */
2308 			if (fl->l_start < 0) {
2309 				return EINVAL;
2310 			}
2311 			return EOVERFLOW;
2312 		}
2313 		/* Check if the start marker is negative. */
2314 		if (fl->l_start + cur_file_offset < 0) {
2315 			return EINVAL;
2316 		}
2317 		/* Check if end marker is beyond LLONG_MAX. */
2318 		if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
2319 		    cur_file_offset, fl->l_len - 1))) {
2320 			return EOVERFLOW;
2321 		}
2322 		/* Check if the end marker is negative. */
2323 		if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
2324 		    fl->l_len < 0)) {
2325 			return EINVAL;
2326 		}
2327 	} else if (fl->l_whence == SEEK_SET) {
2328 		/* Check if the start marker is negative. */
2329 		if (fl->l_start < 0) {
2330 			return EINVAL;
2331 		}
2332 		/* Check if the end marker is beyond LLONG_MAX. */
2333 		if ((fl->l_len > 0) &&
2334 		    CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
2335 			return EOVERFLOW;
2336 		}
2337 		/* Check if the end marker is negative. */
2338 		if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
2339 			return EINVAL;
2340 		}
2341 	}
2342 	return 0;
2343 }
2344 
2345 
2346 /*
2347  * sys_dup
2348  *
2349  * Description:	Duplicate a file descriptor.
2350  *
2351  * Parameters:	p				Process performing the dup
2352  *		uap->fd				The fd to dup
2353  *		retval				Pointer to the call return area
2354  *
2355  * Returns:	0				Success
2356  *		!0				Errno
2357  *
2358  * Implicit returns:
2359  *		*retval (modified)		The new descriptor
2360  */
2361 int
sys_dup(proc_t p,struct dup_args * uap,int32_t * retval)2362 sys_dup(proc_t p, struct dup_args *uap, int32_t *retval)
2363 {
2364 	struct filedesc *fdp = &p->p_fd;
2365 	int old = uap->fd;
2366 	int new, error;
2367 	struct fileproc *fp;
2368 
2369 	proc_fdlock(p);
2370 	if ((error = fp_lookup(p, old, &fp, 1))) {
2371 		proc_fdunlock(p);
2372 		return error;
2373 	}
2374 	if (fp_isguarded(fp, GUARD_DUP)) {
2375 		error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2376 		(void) fp_drop(p, old, fp, 1);
2377 		proc_fdunlock(p);
2378 		return error;
2379 	}
2380 	if ((error = fdalloc(p, 0, &new))) {
2381 		fp_drop(p, old, fp, 1);
2382 		proc_fdunlock(p);
2383 		return error;
2384 	}
2385 	error = finishdup(p, fdp, old, new, 0, retval);
2386 
2387 	if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
2388 		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
2389 		    new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp_get_data(fp)));
2390 	}
2391 
2392 	fp_drop(p, old, fp, 1);
2393 	proc_fdunlock(p);
2394 
2395 	return error;
2396 }
2397 
2398 /*
2399  * sys_dup2
2400  *
2401  * Description:	Duplicate a file descriptor to a particular value.
2402  *
2403  * Parameters:	p				Process performing the dup
2404  *		uap->from			The fd to dup
2405  *		uap->to				The fd to dup it to
2406  *		retval				Pointer to the call return area
2407  *
2408  * Returns:	0				Success
2409  *		!0				Errno
2410  *
2411  * Implicit returns:
2412  *		*retval (modified)		The new descriptor
2413  */
2414 int
sys_dup2(proc_t p,struct dup2_args * uap,int32_t * retval)2415 sys_dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
2416 {
2417 	return dup2(p, uap->from, uap->to, retval);
2418 }
2419 
2420 int
dup2(proc_t p,int old,int new,int * retval)2421 dup2(proc_t p, int old, int new, int *retval)
2422 {
2423 	struct filedesc *fdp = &p->p_fd;
2424 	struct fileproc *fp, *nfp;
2425 	int i, error;
2426 
2427 	proc_fdlock(p);
2428 
2429 startover:
2430 	if ((error = fp_lookup(p, old, &fp, 1))) {
2431 		proc_fdunlock(p);
2432 		return error;
2433 	}
2434 	if (fp_isguarded(fp, GUARD_DUP)) {
2435 		error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2436 		(void) fp_drop(p, old, fp, 1);
2437 		proc_fdunlock(p);
2438 		return error;
2439 	}
2440 	if (new < 0 || new >= proc_limitgetcur_nofile(p)) {
2441 		fp_drop(p, old, fp, 1);
2442 		proc_fdunlock(p);
2443 		return EBADF;
2444 	}
2445 	if (old == new) {
2446 		fp_drop(p, old, fp, 1);
2447 		*retval = new;
2448 		proc_fdunlock(p);
2449 		return 0;
2450 	}
2451 	if (new < 0 || new >= fdp->fd_nfiles) {
2452 		if ((error = fdalloc(p, new, &i))) {
2453 			fp_drop(p, old, fp, 1);
2454 			proc_fdunlock(p);
2455 			return error;
2456 		}
2457 		if (new != i) {
2458 			fdrelse(p, i);
2459 			goto closeit;
2460 		}
2461 	} else {
2462 closeit:
2463 		if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
2464 			fp_drop(p, old, fp, 1);
2465 			procfdtbl_waitfd(p, new);
2466 #if DIAGNOSTIC
2467 			proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2468 #endif
2469 			goto startover;
2470 		}
2471 
2472 		if ((nfp = fdp->fd_ofiles[new]) != NULL) {
2473 			if (fp_isguarded(nfp, GUARD_CLOSE)) {
2474 				fp_drop(p, old, fp, 1);
2475 				error = fp_guard_exception(p,
2476 				    new, nfp, kGUARD_EXC_CLOSE);
2477 				proc_fdunlock(p);
2478 				return error;
2479 			}
2480 			(void)fp_close_and_unlock(p, new, nfp, FD_DUP2RESV);
2481 			proc_fdlock(p);
2482 			assert(fdp->fd_ofileflags[new] & UF_RESERVED);
2483 		} else {
2484 #if DIAGNOSTIC
2485 			if (fdp->fd_ofiles[new] != NULL) {
2486 				panic("dup2: no ref on fileproc %d", new);
2487 			}
2488 #endif
2489 			procfdtbl_reservefd(p, new);
2490 		}
2491 	}
2492 #if DIAGNOSTIC
2493 	if (fdp->fd_ofiles[new] != 0) {
2494 		panic("dup2: overwriting fd_ofiles with new %d", new);
2495 	}
2496 	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
2497 		panic("dup2: unreserved fileflags with new %d", new);
2498 	}
2499 #endif
2500 	error = finishdup(p, fdp, old, new, 0, retval);
2501 	fp_drop(p, old, fp, 1);
2502 	proc_fdunlock(p);
2503 
2504 	return error;
2505 }
2506 
2507 
2508 /*
2509  * fcntl
2510  *
2511  * Description:	The file control system call.
2512  *
2513  * Parameters:	p				Process performing the fcntl
2514  *		uap->fd				The fd to operate against
2515  *		uap->cmd			The command to perform
2516  *		uap->arg			Pointer to the command argument
2517  *		retval				Pointer to the call return area
2518  *
2519  * Returns:	0				Success
2520  *		!0				Errno (see fcntl_nocancel)
2521  *
2522  * Implicit returns:
2523  *		*retval (modified)		fcntl return value (if any)
2524  *
2525  * Notes:	This system call differs from fcntl_nocancel() in that it
2526  *		tests for cancellation prior to performing a potentially
2527  *		blocking operation.
2528  */
2529 int
sys_fcntl(proc_t p,struct fcntl_args * uap,int32_t * retval)2530 sys_fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
2531 {
2532 	__pthread_testcancel(1);
2533 	return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
2534 }
2535 
2536 #define ACCOUNT_OPENFROM_ENTITLEMENT \
2537 	"com.apple.private.vfs.role-account-openfrom"
2538 
2539 /*
2540  * sys_fcntl_nocancel
2541  *
2542  * Description:	A non-cancel-testing file control system call.
2543  *
2544  * Parameters:	p				Process performing the fcntl
2545  *		uap->fd				The fd to operate against
2546  *		uap->cmd			The command to perform
2547  *		uap->arg			Pointer to the command argument
2548  *		retval				Pointer to the call return area
2549  *
2550  * Returns:	0				Success
2551  *		EINVAL
2552  *	fp_lookup:EBADF				Bad file descriptor
2553  * [F_DUPFD]
2554  *	fdalloc:EMFILE
2555  *	fdalloc:ENOMEM
2556  *	finishdup:EBADF
2557  *	finishdup:ENOMEM
2558  * [F_SETOWN]
2559  *		ESRCH
2560  * [F_SETLK]
2561  *		EBADF
2562  *		EOVERFLOW
2563  *	copyin:EFAULT
2564  *	vnode_getwithref:???
2565  *	VNOP_ADVLOCK:???
2566  *	msleep:ETIMEDOUT
2567  * [F_GETLK]
2568  *		EBADF
2569  *		EOVERFLOW
2570  *	copyin:EFAULT
2571  *	copyout:EFAULT
2572  *	vnode_getwithref:???
2573  *	VNOP_ADVLOCK:???
2574  * [F_PREALLOCATE]
2575  *		EBADF
2576  *		EFBIG
2577  *		EINVAL
2578  *		ENOSPC
2579  *	copyin:EFAULT
2580  *	copyout:EFAULT
2581  *	vnode_getwithref:???
2582  *	VNOP_ALLOCATE:???
2583  * [F_SETSIZE,F_RDADVISE]
2584  *		EBADF
2585  *		EINVAL
2586  *	copyin:EFAULT
2587  *	vnode_getwithref:???
2588  * [F_RDAHEAD,F_NOCACHE]
2589  *		EBADF
2590  *	vnode_getwithref:???
2591  * [???]
2592  *
2593  * Implicit returns:
2594  *		*retval (modified)		fcntl return value (if any)
2595  */
2596 #define SYS_FCNTL_DECLARE_VFS_CONTEXT(context) \
2597 	struct vfs_context context = { \
2598 	    .vc_thread = current_thread(), \
2599 	    .vc_ucred = fp->f_cred, \
2600 	}
2601 
2602 static user_addr_t
sys_fnctl_parse_arg(proc_t p,user_long_t arg)2603 sys_fnctl_parse_arg(proc_t p, user_long_t arg)
2604 {
2605 	/*
2606 	 * Since the arg parameter is defined as a long but may be
2607 	 * either a long or a pointer we must take care to handle
2608 	 * sign extension issues.  Our sys call munger will sign
2609 	 * extend a long when we are called from a 32-bit process.
2610 	 * Since we can never have an address greater than 32-bits
2611 	 * from a 32-bit process we lop off the top 32-bits to avoid
2612 	 * getting the wrong address
2613 	 */
2614 	return proc_is64bit(p) ? arg : CAST_USER_ADDR_T((uint32_t)arg);
2615 }
2616 
2617 /* cleanup code common to fnctl functions, for when the fdlock is still held */
2618 static int
sys_fcntl_out(proc_t p,int fd,struct fileproc * fp,int error)2619 sys_fcntl_out(proc_t p, int fd, struct fileproc *fp, int error)
2620 {
2621 	fp_drop(p, fd, fp, 1);
2622 	proc_fdunlock(p);
2623 	return error;
2624 }
2625 
2626 /* cleanup code common to fnctl acting on vnodes, once they unlocked the fdlock */
2627 static int
sys_fcntl_outdrop(proc_t p,int fd,struct fileproc * fp,struct vnode * vp,int error)2628 sys_fcntl_outdrop(proc_t p, int fd, struct fileproc *fp, struct vnode *vp, int error)
2629 {
2630 #pragma unused(vp)
2631 
2632 	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2633 	fp_drop(p, fd, fp, 0);
2634 	return error;
2635 }
2636 
2637 typedef int (*sys_fnctl_handler_t)(proc_t p, int fd, int cmd, user_long_t arg,
2638     struct fileproc *fp, int32_t *retval);
2639 
2640 typedef int (*sys_fnctl_vnode_handler_t)(proc_t p, int fd, int cmd,
2641     user_long_t arg, struct fileproc *fp, struct vnode *vp, int32_t *retval);
2642 
2643 /*
2644  * SPI (private) for opening a file starting from a dir fd
2645  *
2646  * Note: do not inline to keep stack usage under control.
2647  */
2648 __attribute__((noinline))
2649 static int
sys_fcntl__OPENFROM(proc_t p,int fd,int cmd,user_long_t arg,struct fileproc * fp,struct vnode * vp,int32_t * retval)2650 sys_fcntl__OPENFROM(proc_t p, int fd, int cmd, user_long_t arg,
2651     struct fileproc *fp, struct vnode *vp, int32_t *retval)
2652 {
2653 #pragma unused(cmd)
2654 
2655 	user_addr_t argp = sys_fnctl_parse_arg(p, arg);
2656 	struct user_fopenfrom fopen;
2657 	struct vnode_attr *va;
2658 	struct nameidata *nd;
2659 	int error, cmode;
2660 	bool has_entitlement;
2661 
2662 	/* Check if this isn't a valid file descriptor */
2663 	if ((fp->f_flag & FREAD) == 0) {
2664 		return sys_fcntl_out(p, fd, fp, EBADF);
2665 	}
2666 	proc_fdunlock(p);
2667 
2668 	if (vnode_getwithref(vp)) {
2669 		error = ENOENT;
2670 		goto outdrop;
2671 	}
2672 
2673 	/* Only valid for directories */
2674 	if (vp->v_type != VDIR) {
2675 		vnode_put(vp);
2676 		error = ENOTDIR;
2677 		goto outdrop;
2678 	}
2679 
2680 	/*
2681 	 * Only entitled apps may use the credentials of the thread
2682 	 * that opened the file descriptor.
2683 	 * Non-entitled threads will use their own context.
2684 	 */
2685 	has_entitlement = IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT);
2686 
2687 	/* Get flags, mode and pathname arguments. */
2688 	if (IS_64BIT_PROCESS(p)) {
2689 		error = copyin(argp, &fopen, sizeof(fopen));
2690 	} else {
2691 		struct user32_fopenfrom fopen32;
2692 
2693 		error = copyin(argp, &fopen32, sizeof(fopen32));
2694 		fopen.o_flags = fopen32.o_flags;
2695 		fopen.o_mode = fopen32.o_mode;
2696 		fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
2697 	}
2698 	if (error) {
2699 		vnode_put(vp);
2700 		goto outdrop;
2701 	}
2702 
2703 	/* open1() can have really deep stacks, so allocate those */
2704 	va = kalloc_type(struct vnode_attr, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2705 	nd = kalloc_type(struct nameidata, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2706 
2707 	AUDIT_ARG(fflags, fopen.o_flags);
2708 	AUDIT_ARG(mode, fopen.o_mode);
2709 	VATTR_INIT(va);
2710 	/* Mask off all but regular access permissions */
2711 	cmode = ((fopen.o_mode & ~p->p_fd.fd_cmask) & ALLPERMS) & ~S_ISTXT;
2712 	VATTR_SET(va, va_mode, cmode & ACCESSPERMS);
2713 
2714 	SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2715 
2716 	/* Start the lookup relative to the file descriptor's vnode. */
2717 	NDINIT(nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2718 	    fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
2719 	nd->ni_dvp = vp;
2720 
2721 	error = open1(has_entitlement ? &context : vfs_context_current(),
2722 	    nd, fopen.o_flags, va, NULL, NULL, retval, AUTH_OPEN_NOAUTHFD);
2723 
2724 	kfree_type(struct vnode_attr, va);
2725 	kfree_type(struct nameidata, nd);
2726 
2727 	vnode_put(vp);
2728 
2729 outdrop:
2730 	return sys_fcntl_outdrop(p, fd, fp, vp, error);
2731 }
2732 
2733 int
sys_fcntl_nocancel(proc_t p,struct fcntl_nocancel_args * uap,int32_t * retval)2734 sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
2735 {
2736 	int fd = uap->fd;
2737 	int cmd = uap->cmd;
2738 	struct filedesc *fdp = &p->p_fd;
2739 	struct fileproc *fp;
2740 	struct vnode *vp = NULLVP;      /* for AUDIT_ARG() at end */
2741 	unsigned int oflags, nflags;
2742 	int i, tmp, error, error2, flg = 0;
2743 	struct flock fl = {};
2744 	struct flocktimeout fltimeout;
2745 	struct timespec *timeout = NULL;
2746 	off_t offset;
2747 	int newmin;
2748 	daddr64_t lbn, bn;
2749 	unsigned int fflag;
2750 	user_addr_t argp;
2751 	boolean_t is64bit;
2752 	int has_entitlement = 0;
2753 
2754 	AUDIT_ARG(fd, uap->fd);
2755 	AUDIT_ARG(cmd, uap->cmd);
2756 
2757 	proc_fdlock(p);
2758 	if ((error = fp_lookup(p, fd, &fp, 1))) {
2759 		proc_fdunlock(p);
2760 		return error;
2761 	}
2762 
2763 	SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2764 
2765 	is64bit = proc_is64bit(p);
2766 	if (is64bit) {
2767 		argp = uap->arg;
2768 	} else {
2769 		/*
2770 		 * Since the arg parameter is defined as a long but may be
2771 		 * either a long or a pointer we must take care to handle
2772 		 * sign extension issues.  Our sys call munger will sign
2773 		 * extend a long when we are called from a 32-bit process.
2774 		 * Since we can never have an address greater than 32-bits
2775 		 * from a 32-bit process we lop off the top 32-bits to avoid
2776 		 * getting the wrong address
2777 		 */
2778 		argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
2779 	}
2780 
2781 #if CONFIG_MACF
2782 	error = mac_file_check_fcntl(kauth_cred_get(), fp->fp_glob, cmd, uap->arg);
2783 	if (error) {
2784 		goto out;
2785 	}
2786 #endif
2787 
2788 	switch (cmd) {
2789 	case F_DUPFD:
2790 	case F_DUPFD_CLOEXEC:
2791 		if (fp_isguarded(fp, GUARD_DUP)) {
2792 			error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
2793 			goto out;
2794 		}
2795 		newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2796 		AUDIT_ARG(value32, newmin);
2797 		if (newmin < 0 || newmin >= proc_limitgetcur_nofile(p)) {
2798 			error = EINVAL;
2799 			goto out;
2800 		}
2801 		if ((error = fdalloc(p, newmin, &i))) {
2802 			goto out;
2803 		}
2804 		error = finishdup(p, fdp, fd, i,
2805 		    cmd == F_DUPFD_CLOEXEC ? FP_CLOEXEC : 0, retval);
2806 		goto out;
2807 
2808 	case F_GETFD:
2809 		*retval = (fp->fp_flags & FP_CLOEXEC) ? FD_CLOEXEC : 0;
2810 		error = 0;
2811 		goto out;
2812 
2813 	case F_SETFD:
2814 		AUDIT_ARG(value32, (uint32_t)uap->arg);
2815 		if (uap->arg & FD_CLOEXEC) {
2816 			fp->fp_flags |= FP_CLOEXEC;
2817 			error = 0;
2818 		} else if (!fp->fp_guard_attrs) {
2819 			fp->fp_flags &= ~FP_CLOEXEC;
2820 			error = 0;
2821 		} else {
2822 			error = fp_guard_exception(p,
2823 			    fd, fp, kGUARD_EXC_NOCLOEXEC);
2824 		}
2825 		goto out;
2826 
2827 	case F_GETFL:
2828 		fflag = fp->f_flag;
2829 		if ((fflag & O_EVTONLY) && proc_disallow_rw_for_o_evtonly(p)) {
2830 			/*
2831 			 * We insert back F_READ so that conversion back to open flags with
2832 			 * OFLAGS() will come out right. We only need to set 'FREAD' as the
2833 			 * 'O_RDONLY' is always implied.
2834 			 */
2835 			fflag |= FREAD;
2836 		}
2837 		*retval = OFLAGS(fflag);
2838 		error = 0;
2839 		goto out;
2840 
2841 	case F_SETFL:
2842 		// FIXME (rdar://54898652)
2843 		//
2844 		// this code is broken if fnctl(F_SETFL), ioctl() are
2845 		// called concurrently for the same fileglob.
2846 
2847 		tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2848 		AUDIT_ARG(value32, tmp);
2849 
2850 		os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
2851 			nflags  = oflags & ~FCNTLFLAGS;
2852 			nflags |= FFLAGS(tmp) & FCNTLFLAGS;
2853 		});
2854 		tmp = nflags & FNONBLOCK;
2855 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2856 		if (error) {
2857 			goto out;
2858 		}
2859 		tmp = nflags & FASYNC;
2860 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
2861 		if (!error) {
2862 			goto out;
2863 		}
2864 		os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
2865 		tmp = 0;
2866 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2867 		goto out;
2868 
2869 	case F_GETOWN:
2870 		if (fp->f_type == DTYPE_SOCKET) {
2871 			*retval = ((struct socket *)fp_get_data(fp))->so_pgid;
2872 			error = 0;
2873 			goto out;
2874 		}
2875 		error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
2876 		*retval = -*retval;
2877 		goto out;
2878 
2879 	case F_SETOWN:
2880 		tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
2881 		AUDIT_ARG(value32, tmp);
2882 		if (fp->f_type == DTYPE_SOCKET) {
2883 			((struct socket *)fp_get_data(fp))->so_pgid = tmp;
2884 			error = 0;
2885 			goto out;
2886 		}
2887 		if (fp->f_type == DTYPE_PIPE) {
2888 			error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2889 			goto out;
2890 		}
2891 
2892 		if (tmp <= 0) {
2893 			tmp = -tmp;
2894 		} else {
2895 			proc_t p1 = proc_find(tmp);
2896 			if (p1 == 0) {
2897 				error = ESRCH;
2898 				goto out;
2899 			}
2900 			tmp = (int)p1->p_pgrpid;
2901 			proc_rele(p1);
2902 		}
2903 		error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2904 		goto out;
2905 
2906 	case F_SETNOSIGPIPE:
2907 		tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
2908 		if (fp->f_type == DTYPE_SOCKET) {
2909 #if SOCKETS
2910 			error = sock_setsockopt((struct socket *)fp_get_data(fp),
2911 			    SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
2912 #else
2913 			error = EINVAL;
2914 #endif
2915 		} else {
2916 			struct fileglob *fg = fp->fp_glob;
2917 
2918 			lck_mtx_lock_spin(&fg->fg_lock);
2919 			if (tmp) {
2920 				fg->fg_lflags |= FG_NOSIGPIPE;
2921 			} else {
2922 				fg->fg_lflags &= ~FG_NOSIGPIPE;
2923 			}
2924 			lck_mtx_unlock(&fg->fg_lock);
2925 			error = 0;
2926 		}
2927 		goto out;
2928 
2929 	case F_GETNOSIGPIPE:
2930 		if (fp->f_type == DTYPE_SOCKET) {
2931 #if SOCKETS
2932 			int retsize = sizeof(*retval);
2933 			error = sock_getsockopt((struct socket *)fp_get_data(fp),
2934 			    SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
2935 #else
2936 			error = EINVAL;
2937 #endif
2938 		} else {
2939 			*retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
2940 			    1 : 0;
2941 			error = 0;
2942 		}
2943 		goto out;
2944 
2945 	case F_SETCONFINED:
2946 		/*
2947 		 * If this is the only reference to this fglob in the process
2948 		 * and it's already marked as close-on-fork then mark it as
2949 		 * (immutably) "confined" i.e. any fd that points to it will
2950 		 * forever be close-on-fork, and attempts to use an IPC
2951 		 * mechanism to move the descriptor elsewhere will fail.
2952 		 */
2953 		if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
2954 			struct fileglob *fg = fp->fp_glob;
2955 
2956 			lck_mtx_lock_spin(&fg->fg_lock);
2957 			if (fg->fg_lflags & FG_CONFINED) {
2958 				error = 0;
2959 			} else if (1 != os_ref_get_count_raw(&fg->fg_count)) {
2960 				error = EAGAIN; /* go close the dup .. */
2961 			} else if (fp->fp_flags & FP_CLOFORK) {
2962 				fg->fg_lflags |= FG_CONFINED;
2963 				error = 0;
2964 			} else {
2965 				error = EBADF;  /* open without O_CLOFORK? */
2966 			}
2967 			lck_mtx_unlock(&fg->fg_lock);
2968 		} else {
2969 			/*
2970 			 * Other subsystems may have built on the immutability
2971 			 * of FG_CONFINED; clearing it may be tricky.
2972 			 */
2973 			error = EPERM;          /* immutable */
2974 		}
2975 		goto out;
2976 
2977 	case F_GETCONFINED:
2978 		*retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? 1 : 0;
2979 		error = 0;
2980 		goto out;
2981 
2982 	case F_SETLKWTIMEOUT:
2983 	case F_SETLKW:
2984 	case F_OFD_SETLKWTIMEOUT:
2985 	case F_OFD_SETLKW:
2986 		flg |= F_WAIT;
2987 		OS_FALLTHROUGH;
2988 
2989 	case F_SETLK:
2990 	case F_OFD_SETLK:
2991 		if (fp->f_type != DTYPE_VNODE) {
2992 			error = EBADF;
2993 			goto out;
2994 		}
2995 		vp = (struct vnode *)fp_get_data(fp);
2996 
2997 		fflag = fp->f_flag;
2998 		offset = fp->f_offset;
2999 		proc_fdunlock(p);
3000 
3001 		/* Copy in the lock structure */
3002 		if (F_SETLKWTIMEOUT == cmd || F_OFD_SETLKWTIMEOUT == cmd) {
3003 			error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
3004 			if (error) {
3005 				goto outdrop;
3006 			}
3007 			fl = fltimeout.fl;
3008 			timeout = &fltimeout.timeout;
3009 		} else {
3010 			error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3011 			if (error) {
3012 				goto outdrop;
3013 			}
3014 		}
3015 
3016 		/* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3017 		/* and ending byte for EOVERFLOW in SEEK_SET */
3018 		error = check_file_seek_range(&fl, offset);
3019 		if (error) {
3020 			goto outdrop;
3021 		}
3022 
3023 		if ((error = vnode_getwithref(vp))) {
3024 			goto outdrop;
3025 		}
3026 		if (fl.l_whence == SEEK_CUR) {
3027 			fl.l_start += offset;
3028 		}
3029 
3030 #if CONFIG_MACF
3031 		error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3032 		    F_SETLK, &fl);
3033 		if (error) {
3034 			(void)vnode_put(vp);
3035 			goto outdrop;
3036 		}
3037 #endif
3038 
3039 #if CONFIG_FILE_LEASES
3040 		(void)vnode_breaklease(vp, O_WRONLY, vfs_context_current());
3041 #endif
3042 
3043 		switch (cmd) {
3044 		case F_OFD_SETLK:
3045 		case F_OFD_SETLKW:
3046 		case F_OFD_SETLKWTIMEOUT:
3047 			flg |= F_OFD_LOCK;
3048 			switch (fl.l_type) {
3049 			case F_RDLCK:
3050 				if ((fflag & FREAD) == 0) {
3051 					error = EBADF;
3052 					break;
3053 				}
3054 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3055 				    F_SETLK, &fl, flg, &context, timeout);
3056 				break;
3057 			case F_WRLCK:
3058 				if ((fflag & FWRITE) == 0) {
3059 					error = EBADF;
3060 					break;
3061 				}
3062 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3063 				    F_SETLK, &fl, flg, &context, timeout);
3064 				break;
3065 			case F_UNLCK:
3066 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3067 				    F_UNLCK, &fl, F_OFD_LOCK, &context,
3068 				    timeout);
3069 				break;
3070 			default:
3071 				error = EINVAL;
3072 				break;
3073 			}
3074 			if (0 == error &&
3075 			    (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
3076 				struct fileglob *fg = fp->fp_glob;
3077 
3078 				/*
3079 				 * arrange F_UNLCK on last close (once
3080 				 * set, FG_HAS_OFDLOCK is immutable)
3081 				 */
3082 				if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
3083 					lck_mtx_lock_spin(&fg->fg_lock);
3084 					fg->fg_lflags |= FG_HAS_OFDLOCK;
3085 					lck_mtx_unlock(&fg->fg_lock);
3086 				}
3087 			}
3088 			break;
3089 		default:
3090 			flg |= F_POSIX;
3091 			switch (fl.l_type) {
3092 			case F_RDLCK:
3093 				if ((fflag & FREAD) == 0) {
3094 					error = EBADF;
3095 					break;
3096 				}
3097 				// XXX UInt32 unsafe for LP64 kernel
3098 				os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3099 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3100 				    F_SETLK, &fl, flg, &context, timeout);
3101 				break;
3102 			case F_WRLCK:
3103 				if ((fflag & FWRITE) == 0) {
3104 					error = EBADF;
3105 					break;
3106 				}
3107 				// XXX UInt32 unsafe for LP64 kernel
3108 				os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3109 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3110 				    F_SETLK, &fl, flg, &context, timeout);
3111 				break;
3112 			case F_UNLCK:
3113 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3114 				    F_UNLCK, &fl, F_POSIX, &context, timeout);
3115 				break;
3116 			default:
3117 				error = EINVAL;
3118 				break;
3119 			}
3120 			break;
3121 		}
3122 		(void) vnode_put(vp);
3123 		goto outdrop;
3124 
3125 	case F_GETLK:
3126 	case F_OFD_GETLK:
3127 	case F_GETLKPID:
3128 	case F_OFD_GETLKPID:
3129 		if (fp->f_type != DTYPE_VNODE) {
3130 			error = EBADF;
3131 			goto out;
3132 		}
3133 		vp = (struct vnode *)fp_get_data(fp);
3134 
3135 		offset = fp->f_offset;
3136 		proc_fdunlock(p);
3137 
3138 		/* Copy in the lock structure */
3139 		error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3140 		if (error) {
3141 			goto outdrop;
3142 		}
3143 
3144 		/* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3145 		/* and ending byte for EOVERFLOW in SEEK_SET */
3146 		error = check_file_seek_range(&fl, offset);
3147 		if (error) {
3148 			goto outdrop;
3149 		}
3150 
3151 		if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
3152 			error = EINVAL;
3153 			goto outdrop;
3154 		}
3155 
3156 		switch (fl.l_type) {
3157 		case F_RDLCK:
3158 		case F_UNLCK:
3159 		case F_WRLCK:
3160 			break;
3161 		default:
3162 			error = EINVAL;
3163 			goto outdrop;
3164 		}
3165 
3166 		switch (fl.l_whence) {
3167 		case SEEK_CUR:
3168 		case SEEK_SET:
3169 		case SEEK_END:
3170 			break;
3171 		default:
3172 			error = EINVAL;
3173 			goto outdrop;
3174 		}
3175 
3176 		if ((error = vnode_getwithref(vp)) == 0) {
3177 			if (fl.l_whence == SEEK_CUR) {
3178 				fl.l_start += offset;
3179 			}
3180 
3181 #if CONFIG_MACF
3182 			error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3183 			    cmd, &fl);
3184 			if (error == 0)
3185 #endif
3186 			switch (cmd) {
3187 			case F_OFD_GETLK:
3188 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3189 				    F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
3190 				break;
3191 			case F_OFD_GETLKPID:
3192 				error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3193 				    F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
3194 				break;
3195 			default:
3196 				error = VNOP_ADVLOCK(vp, (caddr_t)p,
3197 				    cmd, &fl, F_POSIX, &context, NULL);
3198 				break;
3199 			}
3200 
3201 			(void)vnode_put(vp);
3202 
3203 			if (error == 0) {
3204 				error = copyout((caddr_t)&fl, argp, sizeof(fl));
3205 			}
3206 		}
3207 		goto outdrop;
3208 
3209 	case F_PREALLOCATE: {
3210 		fstore_t alloc_struct;    /* structure for allocate command */
3211 		u_int32_t alloc_flags = 0;
3212 
3213 		if (fp->f_type != DTYPE_VNODE) {
3214 			error = EBADF;
3215 			goto out;
3216 		}
3217 
3218 		vp = (struct vnode *)fp_get_data(fp);
3219 		proc_fdunlock(p);
3220 
3221 		/* make sure that we have write permission */
3222 		if ((fp->f_flag & FWRITE) == 0) {
3223 			error = EBADF;
3224 			goto outdrop;
3225 		}
3226 
3227 		error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
3228 		if (error) {
3229 			goto outdrop;
3230 		}
3231 
3232 		/* now set the space allocated to 0 */
3233 		alloc_struct.fst_bytesalloc = 0;
3234 
3235 		/*
3236 		 * Do some simple parameter checking
3237 		 */
3238 
3239 		/* set up the flags */
3240 
3241 		alloc_flags |= PREALLOCATE;
3242 
3243 		if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
3244 			alloc_flags |= ALLOCATECONTIG;
3245 		}
3246 
3247 		if (alloc_struct.fst_flags & F_ALLOCATEALL) {
3248 			alloc_flags |= ALLOCATEALL;
3249 		}
3250 
3251 		if (alloc_struct.fst_flags & F_ALLOCATEPERSIST) {
3252 			alloc_flags |= ALLOCATEPERSIST;
3253 		}
3254 
3255 		/*
3256 		 * Do any position mode specific stuff.  The only
3257 		 * position mode  supported now is PEOFPOSMODE
3258 		 */
3259 
3260 		switch (alloc_struct.fst_posmode) {
3261 		case F_PEOFPOSMODE:
3262 			if (alloc_struct.fst_offset != 0) {
3263 				error = EINVAL;
3264 				goto outdrop;
3265 			}
3266 
3267 			alloc_flags |= ALLOCATEFROMPEOF;
3268 			break;
3269 
3270 		case F_VOLPOSMODE:
3271 			if (alloc_struct.fst_offset <= 0) {
3272 				error = EINVAL;
3273 				goto outdrop;
3274 			}
3275 
3276 			alloc_flags |= ALLOCATEFROMVOL;
3277 			break;
3278 
3279 		default: {
3280 			error = EINVAL;
3281 			goto outdrop;
3282 		}
3283 		}
3284 		if ((error = vnode_getwithref(vp)) == 0) {
3285 			/*
3286 			 * call allocate to get the space
3287 			 */
3288 			error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
3289 			    &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
3290 			    &context);
3291 			(void)vnode_put(vp);
3292 
3293 			error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
3294 
3295 			if (error == 0) {
3296 				error = error2;
3297 			}
3298 		}
3299 		goto outdrop;
3300 	}
3301 	case F_PUNCHHOLE: {
3302 		fpunchhole_t args;
3303 
3304 		if (fp->f_type != DTYPE_VNODE) {
3305 			error = EBADF;
3306 			goto out;
3307 		}
3308 
3309 		vp = (struct vnode *)fp_get_data(fp);
3310 		proc_fdunlock(p);
3311 
3312 		/* need write permissions */
3313 		if ((fp->f_flag & FWRITE) == 0) {
3314 			error = EPERM;
3315 			goto outdrop;
3316 		}
3317 
3318 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3319 			goto outdrop;
3320 		}
3321 
3322 		if ((error = vnode_getwithref(vp))) {
3323 			goto outdrop;
3324 		}
3325 
3326 #if CONFIG_MACF
3327 		if ((error = mac_vnode_check_write(&context, fp->fp_glob->fg_cred, vp))) {
3328 			(void)vnode_put(vp);
3329 			goto outdrop;
3330 		}
3331 #endif
3332 
3333 		error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
3334 		(void)vnode_put(vp);
3335 
3336 		goto outdrop;
3337 	}
3338 	case F_TRIM_ACTIVE_FILE: {
3339 		ftrimactivefile_t args;
3340 
3341 		if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
3342 			error = EACCES;
3343 			goto out;
3344 		}
3345 
3346 		if (fp->f_type != DTYPE_VNODE) {
3347 			error = EBADF;
3348 			goto out;
3349 		}
3350 
3351 		vp = (struct vnode *)fp_get_data(fp);
3352 		proc_fdunlock(p);
3353 
3354 		/* need write permissions */
3355 		if ((fp->f_flag & FWRITE) == 0) {
3356 			error = EPERM;
3357 			goto outdrop;
3358 		}
3359 
3360 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3361 			goto outdrop;
3362 		}
3363 
3364 		if ((error = vnode_getwithref(vp))) {
3365 			goto outdrop;
3366 		}
3367 
3368 		error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
3369 		(void)vnode_put(vp);
3370 
3371 		goto outdrop;
3372 	}
3373 	case F_SPECULATIVE_READ: {
3374 		fspecread_t args;
3375 		off_t temp_length = 0;
3376 
3377 		if (fp->f_type != DTYPE_VNODE) {
3378 			error = EBADF;
3379 			goto out;
3380 		}
3381 
3382 		vp = (struct vnode *)fp_get_data(fp);
3383 		proc_fdunlock(p);
3384 
3385 		if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3386 			goto outdrop;
3387 		}
3388 
3389 		/* Discard invalid offsets or lengths */
3390 		if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
3391 			error = EINVAL;
3392 			goto outdrop;
3393 		}
3394 
3395 		/*
3396 		 * Round the file offset down to a page-size boundary (or to 0).
3397 		 * The filesystem will need to round the length up to the end of the page boundary
3398 		 * or to the EOF of the file.
3399 		 */
3400 		uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
3401 		uint64_t foff_delta = args.fsr_offset - foff;
3402 		args.fsr_offset = (off_t) foff;
3403 
3404 		/*
3405 		 * Now add in the delta to the supplied length. Since we may have adjusted the
3406 		 * offset, increase it by the amount that we adjusted.
3407 		 */
3408 		if (os_add_overflow(args.fsr_length, foff_delta, &args.fsr_length)) {
3409 			error = EOVERFLOW;
3410 			goto outdrop;
3411 		}
3412 
3413 		/*
3414 		 * Make sure (fsr_offset + fsr_length) does not overflow.
3415 		 */
3416 		if (os_add_overflow(args.fsr_offset, args.fsr_length, &temp_length)) {
3417 			error = EOVERFLOW;
3418 			goto outdrop;
3419 		}
3420 
3421 		if ((error = vnode_getwithref(vp))) {
3422 			goto outdrop;
3423 		}
3424 		error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
3425 		(void)vnode_put(vp);
3426 
3427 		goto outdrop;
3428 	}
3429 	case F_SETSIZE:
3430 		if (fp->f_type != DTYPE_VNODE) {
3431 			error = EBADF;
3432 			goto out;
3433 		}
3434 		vp = (struct vnode *)fp_get_data(fp);
3435 		proc_fdunlock(p);
3436 
3437 		error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
3438 		if (error) {
3439 			goto outdrop;
3440 		}
3441 		AUDIT_ARG(value64, offset);
3442 
3443 		error = vnode_getwithref(vp);
3444 		if (error) {
3445 			goto outdrop;
3446 		}
3447 
3448 #if CONFIG_MACF
3449 		error = mac_vnode_check_truncate(&context,
3450 		    fp->fp_glob->fg_cred, vp);
3451 		if (error) {
3452 			(void)vnode_put(vp);
3453 			goto outdrop;
3454 		}
3455 #endif
3456 		/*
3457 		 * Make sure that we are root.  Growing a file
3458 		 * without zero filling the data is a security hole.
3459 		 */
3460 		if (!kauth_cred_issuser(kauth_cred_get())) {
3461 			error = EACCES;
3462 		} else {
3463 			/*
3464 			 * Require privilege to change file size without zerofill,
3465 			 * else will change the file size and zerofill it.
3466 			 */
3467 			error = priv_check_cred(kauth_cred_get(), PRIV_VFS_SETSIZE, 0);
3468 			if (error == 0) {
3469 				error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
3470 			} else {
3471 				error = vnode_setsize(vp, offset, 0, &context);
3472 			}
3473 
3474 #if CONFIG_MACF
3475 			if (error == 0) {
3476 				mac_vnode_notify_truncate(&context, fp->fp_glob->fg_cred, vp);
3477 			}
3478 #endif
3479 		}
3480 
3481 		(void)vnode_put(vp);
3482 		goto outdrop;
3483 
3484 	case F_RDAHEAD:
3485 		if (fp->f_type != DTYPE_VNODE) {
3486 			error = EBADF;
3487 			goto out;
3488 		}
3489 		if (uap->arg) {
3490 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3491 		} else {
3492 			os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3493 		}
3494 		goto out;
3495 
3496 	case F_NOCACHE:
3497 		if (fp->f_type != DTYPE_VNODE) {
3498 			error = EBADF;
3499 			goto out;
3500 		}
3501 		if (uap->arg) {
3502 			os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3503 		} else {
3504 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3505 		}
3506 		goto out;
3507 
3508 	case F_NODIRECT:
3509 		if (fp->f_type != DTYPE_VNODE) {
3510 			error = EBADF;
3511 			goto out;
3512 		}
3513 		if (uap->arg) {
3514 			os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3515 		} else {
3516 			os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3517 		}
3518 		goto out;
3519 
3520 	case F_SINGLE_WRITER:
3521 		if (fp->f_type != DTYPE_VNODE) {
3522 			error = EBADF;
3523 			goto out;
3524 		}
3525 		if (uap->arg) {
3526 			os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3527 		} else {
3528 			os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3529 		}
3530 		goto out;
3531 
3532 	case F_GLOBAL_NOCACHE:
3533 		if (fp->f_type != DTYPE_VNODE) {
3534 			error = EBADF;
3535 			goto out;
3536 		}
3537 		vp = (struct vnode *)fp_get_data(fp);
3538 		proc_fdunlock(p);
3539 
3540 		if ((error = vnode_getwithref(vp)) == 0) {
3541 			*retval = vnode_isnocache(vp);
3542 
3543 			if (uap->arg) {
3544 				vnode_setnocache(vp);
3545 			} else {
3546 				vnode_clearnocache(vp);
3547 			}
3548 
3549 			(void)vnode_put(vp);
3550 		}
3551 		goto outdrop;
3552 
3553 	case F_CHECK_OPENEVT:
3554 		if (fp->f_type != DTYPE_VNODE) {
3555 			error = EBADF;
3556 			goto out;
3557 		}
3558 		vp = (struct vnode *)fp_get_data(fp);
3559 		proc_fdunlock(p);
3560 
3561 		if ((error = vnode_getwithref(vp)) == 0) {
3562 			*retval = vnode_is_openevt(vp);
3563 
3564 			if (uap->arg) {
3565 				vnode_set_openevt(vp);
3566 			} else {
3567 				vnode_clear_openevt(vp);
3568 			}
3569 
3570 			(void)vnode_put(vp);
3571 		}
3572 		goto outdrop;
3573 
3574 	case F_RDADVISE: {
3575 		struct radvisory ra_struct;
3576 
3577 		if (fp->f_type != DTYPE_VNODE) {
3578 			error = EBADF;
3579 			goto out;
3580 		}
3581 		vp = (struct vnode *)fp_get_data(fp);
3582 		proc_fdunlock(p);
3583 
3584 		if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
3585 			goto outdrop;
3586 		}
3587 		if (ra_struct.ra_offset < 0 || ra_struct.ra_count < 0) {
3588 			error = EINVAL;
3589 			goto outdrop;
3590 		}
3591 		if ((error = vnode_getwithref(vp)) == 0) {
3592 			error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
3593 
3594 			(void)vnode_put(vp);
3595 		}
3596 		goto outdrop;
3597 	}
3598 
3599 	case F_FLUSH_DATA:
3600 
3601 		if (fp->f_type != DTYPE_VNODE) {
3602 			error = EBADF;
3603 			goto out;
3604 		}
3605 		vp = (struct vnode *)fp_get_data(fp);
3606 		proc_fdunlock(p);
3607 
3608 		if ((error = vnode_getwithref(vp)) == 0) {
3609 			error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
3610 
3611 			(void)vnode_put(vp);
3612 		}
3613 		goto outdrop;
3614 
3615 	case F_LOG2PHYS:
3616 	case F_LOG2PHYS_EXT: {
3617 		struct log2phys l2p_struct = {};    /* structure for allocate command */
3618 		int devBlockSize;
3619 
3620 		off_t file_offset = 0;
3621 		size_t a_size = 0;
3622 		size_t run = 0;
3623 
3624 		if (cmd == F_LOG2PHYS_EXT) {
3625 			error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
3626 			if (error) {
3627 				goto out;
3628 			}
3629 			file_offset = l2p_struct.l2p_devoffset;
3630 		} else {
3631 			file_offset = fp->f_offset;
3632 		}
3633 		if (fp->f_type != DTYPE_VNODE) {
3634 			error = EBADF;
3635 			goto out;
3636 		}
3637 		vp = (struct vnode *)fp_get_data(fp);
3638 		proc_fdunlock(p);
3639 		if ((error = vnode_getwithref(vp))) {
3640 			goto outdrop;
3641 		}
3642 		error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
3643 		if (error) {
3644 			(void)vnode_put(vp);
3645 			goto outdrop;
3646 		}
3647 		error = VNOP_BLKTOOFF(vp, lbn, &offset);
3648 		if (error) {
3649 			(void)vnode_put(vp);
3650 			goto outdrop;
3651 		}
3652 		devBlockSize = vfs_devblocksize(vnode_mount(vp));
3653 		if (cmd == F_LOG2PHYS_EXT) {
3654 			if (l2p_struct.l2p_contigbytes < 0) {
3655 				vnode_put(vp);
3656 				error = EINVAL;
3657 				goto outdrop;
3658 			}
3659 
3660 			a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
3661 		} else {
3662 			a_size = devBlockSize;
3663 		}
3664 
3665 		error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
3666 
3667 		(void)vnode_put(vp);
3668 
3669 		if (!error) {
3670 			l2p_struct.l2p_flags = 0;       /* for now */
3671 			if (cmd == F_LOG2PHYS_EXT) {
3672 				l2p_struct.l2p_contigbytes = run - (file_offset - offset);
3673 			} else {
3674 				l2p_struct.l2p_contigbytes = 0; /* for now */
3675 			}
3676 
3677 			/*
3678 			 * The block number being -1 suggests that the file offset is not backed
3679 			 * by any real blocks on-disk.  As a result, just let it be passed back up wholesale.
3680 			 */
3681 			if (bn == -1) {
3682 				/* Don't multiply it by the block size */
3683 				l2p_struct.l2p_devoffset = bn;
3684 			} else {
3685 				l2p_struct.l2p_devoffset = bn * devBlockSize;
3686 				l2p_struct.l2p_devoffset += file_offset - offset;
3687 			}
3688 			error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
3689 		}
3690 		goto outdrop;
3691 	}
3692 	case F_GETPATH:
3693 	case F_GETPATH_NOFIRMLINK: {
3694 		char *pathbufp;
3695 		size_t pathlen;
3696 
3697 		if (fp->f_type != DTYPE_VNODE) {
3698 			error = EBADF;
3699 			goto out;
3700 		}
3701 		vp = (struct vnode *)fp_get_data(fp);
3702 		proc_fdunlock(p);
3703 
3704 		pathlen = MAXPATHLEN;
3705 		pathbufp = zalloc(ZV_NAMEI);
3706 
3707 		if ((error = vnode_getwithref(vp)) == 0) {
3708 			error = vn_getpath_ext(vp, NULL, pathbufp,
3709 			    &pathlen, cmd == F_GETPATH_NOFIRMLINK ?
3710 			    VN_GETPATH_NO_FIRMLINK : 0);
3711 			(void)vnode_put(vp);
3712 
3713 			if (error == 0) {
3714 				error = copyout((caddr_t)pathbufp, argp, pathlen);
3715 			}
3716 		}
3717 		zfree(ZV_NAMEI, pathbufp);
3718 		goto outdrop;
3719 	}
3720 
3721 	case F_PATHPKG_CHECK: {
3722 		char *pathbufp;
3723 		size_t pathlen;
3724 
3725 		if (fp->f_type != DTYPE_VNODE) {
3726 			error = EBADF;
3727 			goto out;
3728 		}
3729 		vp = (struct vnode *)fp_get_data(fp);
3730 		proc_fdunlock(p);
3731 
3732 		pathlen = MAXPATHLEN;
3733 		pathbufp = zalloc(ZV_NAMEI);
3734 
3735 		if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
3736 			if ((error = vnode_getwithref(vp)) == 0) {
3737 				AUDIT_ARG(text, pathbufp);
3738 				error = vn_path_package_check(vp, pathbufp, (int)pathlen, retval);
3739 
3740 				(void)vnode_put(vp);
3741 			}
3742 		}
3743 		zfree(ZV_NAMEI, pathbufp);
3744 		goto outdrop;
3745 	}
3746 
3747 	case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
3748 	case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZE
3749 	case F_BARRIERFSYNC:  // fsync + barrier
3750 	case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
3751 	case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
3752 		if (fp->f_type != DTYPE_VNODE) {
3753 			error = EBADF;
3754 			goto out;
3755 		}
3756 		vp = (struct vnode *)fp_get_data(fp);
3757 		proc_fdunlock(p);
3758 
3759 		if ((error = vnode_getwithref(vp)) == 0) {
3760 			error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
3761 
3762 			(void)vnode_put(vp);
3763 		}
3764 		break;
3765 	}
3766 
3767 	/*
3768 	 * SPI (private) for opening a file starting from a dir fd
3769 	 */
3770 	case F_OPENFROM: {
3771 		/* Check if this isn't a valid file descriptor */
3772 		if (fp->f_type != DTYPE_VNODE) {
3773 			error = EBADF;
3774 			goto out;
3775 		}
3776 		vp = (struct vnode *)fp_get_data(fp);
3777 
3778 		return sys_fcntl__OPENFROM(p, fd, cmd, uap->arg, fp, vp, retval);
3779 	}
3780 
3781 	/*
3782 	 * SPI (private) for unlinking a file starting from a dir fd
3783 	 */
3784 	case F_UNLINKFROM: {
3785 		user_addr_t pathname;
3786 
3787 		/* Check if this isn't a valid file descriptor */
3788 		if ((fp->f_type != DTYPE_VNODE) ||
3789 		    (fp->f_flag & FREAD) == 0) {
3790 			error = EBADF;
3791 			goto out;
3792 		}
3793 		vp = (struct vnode *)fp_get_data(fp);
3794 		proc_fdunlock(p);
3795 
3796 		if (vnode_getwithref(vp)) {
3797 			error = ENOENT;
3798 			goto outdrop;
3799 		}
3800 
3801 		/* Only valid for directories */
3802 		if (vp->v_type != VDIR) {
3803 			vnode_put(vp);
3804 			error = ENOTDIR;
3805 			goto outdrop;
3806 		}
3807 
3808 		/*
3809 		 * Only entitled apps may use the credentials of the thread
3810 		 * that opened the file descriptor.
3811 		 * Non-entitled threads will use their own context.
3812 		 */
3813 		if (IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT)) {
3814 			has_entitlement = 1;
3815 		}
3816 
3817 		/* Get flags, mode and pathname arguments. */
3818 		if (IS_64BIT_PROCESS(p)) {
3819 			pathname = (user_addr_t)argp;
3820 		} else {
3821 			pathname = CAST_USER_ADDR_T(argp);
3822 		}
3823 
3824 		/* Start the lookup relative to the file descriptor's vnode. */
3825 		error = unlink1(has_entitlement ? &context : vfs_context_current(),
3826 		    vp, pathname, UIO_USERSPACE, 0);
3827 
3828 		vnode_put(vp);
3829 		break;
3830 	}
3831 
3832 	case F_ADDSIGS:
3833 	case F_ADDFILESIGS:
3834 	case F_ADDFILESIGS_FOR_DYLD_SIM:
3835 	case F_ADDFILESIGS_RETURN:
3836 	case F_ADDFILESIGS_INFO:
3837 	{
3838 		struct cs_blob *blob = NULL;
3839 		struct user_fsignatures fs;
3840 		kern_return_t kr;
3841 		vm_offset_t kernel_blob_addr;
3842 		vm_size_t kernel_blob_size;
3843 		int blob_add_flags = 0;
3844 		const size_t sizeof_fs = (cmd == F_ADDFILESIGS_INFO ?
3845 		    offsetof(struct user_fsignatures, fs_cdhash /* first output element */) :
3846 		    offsetof(struct user_fsignatures, fs_fsignatures_size /* compat */));
3847 
3848 		if (fp->f_type != DTYPE_VNODE) {
3849 			error = EBADF;
3850 			goto out;
3851 		}
3852 		vp = (struct vnode *)fp_get_data(fp);
3853 		proc_fdunlock(p);
3854 
3855 		if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3856 			blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
3857 			if ((proc_getcsflags(p) & CS_KILL) == 0) {
3858 				proc_lock(p);
3859 				proc_csflags_set(p, CS_KILL);
3860 				proc_unlock(p);
3861 			}
3862 		}
3863 
3864 		error = vnode_getwithref(vp);
3865 		if (error) {
3866 			goto outdrop;
3867 		}
3868 
3869 		if (IS_64BIT_PROCESS(p)) {
3870 			error = copyin(argp, &fs, sizeof_fs);
3871 		} else {
3872 			if (cmd == F_ADDFILESIGS_INFO) {
3873 				error = EINVAL;
3874 				vnode_put(vp);
3875 				goto outdrop;
3876 			}
3877 
3878 			struct user32_fsignatures fs32;
3879 
3880 			error = copyin(argp, &fs32, sizeof(fs32));
3881 			fs.fs_file_start = fs32.fs_file_start;
3882 			fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
3883 			fs.fs_blob_size = fs32.fs_blob_size;
3884 		}
3885 
3886 		if (error) {
3887 			vnode_put(vp);
3888 			goto outdrop;
3889 		}
3890 
3891 		/*
3892 		 * First check if we have something loaded a this offset
3893 		 */
3894 		blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
3895 		if (blob != NULL) {
3896 			/* If this is for dyld_sim revalidate the blob */
3897 			if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3898 				error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
3899 				if (error) {
3900 					blob = NULL;
3901 					if (error != EAGAIN) {
3902 						vnode_put(vp);
3903 						goto outdrop;
3904 					}
3905 				}
3906 			}
3907 		}
3908 
3909 		if (blob == NULL) {
3910 			/*
3911 			 * An arbitrary limit, to prevent someone from mapping in a 20GB blob.  This should cover
3912 			 * our use cases for the immediate future, but note that at the time of this commit, some
3913 			 * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
3914 			 *
3915 			 * We should consider how we can manage this more effectively; the above means that some
3916 			 * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
3917 			 * threshold considered ridiculous at the time of this change.
3918 			 */
3919 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
3920 			if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
3921 				error = E2BIG;
3922 				vnode_put(vp);
3923 				goto outdrop;
3924 			}
3925 
3926 			kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
3927 			kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
3928 			if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
3929 				error = ENOMEM;
3930 				vnode_put(vp);
3931 				goto outdrop;
3932 			}
3933 
3934 			if (cmd == F_ADDSIGS) {
3935 				error = copyin(fs.fs_blob_start,
3936 				    (void *) kernel_blob_addr,
3937 				    fs.fs_blob_size);
3938 			} else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM || F_ADDFILESIGS_INFO */
3939 				int resid;
3940 
3941 				error = vn_rdwr(UIO_READ,
3942 				    vp,
3943 				    (caddr_t) kernel_blob_addr,
3944 				    (int)kernel_blob_size,
3945 				    fs.fs_file_start + fs.fs_blob_start,
3946 				    UIO_SYSSPACE,
3947 				    0,
3948 				    kauth_cred_get(),
3949 				    &resid,
3950 				    p);
3951 				if ((error == 0) && resid) {
3952 					/* kernel_blob_size rounded to a page size, but signature may be at end of file */
3953 					memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
3954 				}
3955 			}
3956 
3957 			if (error) {
3958 				ubc_cs_blob_deallocate(kernel_blob_addr,
3959 				    kernel_blob_size);
3960 				vnode_put(vp);
3961 				goto outdrop;
3962 			}
3963 
3964 			blob = NULL;
3965 			error = ubc_cs_blob_add(vp,
3966 			    proc_platform(p),
3967 			    CPU_TYPE_ANY,                       /* not for a specific architecture */
3968 			    CPU_SUBTYPE_ANY,
3969 			    fs.fs_file_start,
3970 			    &kernel_blob_addr,
3971 			    kernel_blob_size,
3972 			    NULL,
3973 			    blob_add_flags,
3974 			    &blob);
3975 
3976 			/* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
3977 			if (error) {
3978 				if (kernel_blob_addr) {
3979 					ubc_cs_blob_deallocate(kernel_blob_addr,
3980 					    kernel_blob_size);
3981 				}
3982 				vnode_put(vp);
3983 				goto outdrop;
3984 			} else {
3985 #if CHECK_CS_VALIDATION_BITMAP
3986 				ubc_cs_validation_bitmap_allocate( vp );
3987 #endif
3988 			}
3989 		}
3990 
3991 		if (cmd == F_ADDFILESIGS_RETURN || cmd == F_ADDFILESIGS_FOR_DYLD_SIM ||
3992 		    cmd == F_ADDFILESIGS_INFO) {
3993 			/*
3994 			 * The first element of the structure is a
3995 			 * off_t that happen to have the same size for
3996 			 * all archs. Lets overwrite that.
3997 			 */
3998 			off_t end_offset = 0;
3999 			if (blob) {
4000 				end_offset = blob->csb_end_offset;
4001 			}
4002 			error = copyout(&end_offset, argp, sizeof(end_offset));
4003 
4004 			if (error) {
4005 				vnode_put(vp);
4006 				goto outdrop;
4007 			}
4008 		}
4009 
4010 		if (cmd == F_ADDFILESIGS_INFO) {
4011 			/* Return information. What we copy out depends on the size of the
4012 			 * passed in structure, to keep binary compatibility. */
4013 
4014 			if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
4015 				// enough room for fs_cdhash[20]+fs_hash_type
4016 
4017 				if (blob != NULL) {
4018 					error = copyout(blob->csb_cdhash,
4019 					    (vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
4020 					    USER_FSIGNATURES_CDHASH_LEN);
4021 					if (error) {
4022 						vnode_put(vp);
4023 						goto outdrop;
4024 					}
4025 					int hashtype = cs_hash_type(blob->csb_hashtype);
4026 					error = copyout(&hashtype,
4027 					    (vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
4028 					    sizeof(int));
4029 					if (error) {
4030 						vnode_put(vp);
4031 						goto outdrop;
4032 					}
4033 				}
4034 			}
4035 		}
4036 
4037 		(void) vnode_put(vp);
4038 		break;
4039 	}
4040 #if CONFIG_SUPPLEMENTAL_SIGNATURES
4041 	case F_ADDFILESUPPL:
4042 	{
4043 		struct vnode *ivp;
4044 		struct cs_blob *blob = NULL;
4045 		struct user_fsupplement fs;
4046 		int orig_fd;
4047 		struct fileproc* orig_fp = NULL;
4048 		kern_return_t kr;
4049 		vm_offset_t kernel_blob_addr;
4050 		vm_size_t kernel_blob_size;
4051 
4052 		if (!IS_64BIT_PROCESS(p)) {
4053 			error = EINVAL;
4054 			goto out; // drop fp and unlock fds
4055 		}
4056 
4057 		if (fp->f_type != DTYPE_VNODE) {
4058 			error = EBADF;
4059 			goto out;
4060 		}
4061 
4062 		error = copyin(argp, &fs, sizeof(fs));
4063 		if (error) {
4064 			goto out;
4065 		}
4066 
4067 		orig_fd = fs.fs_orig_fd;
4068 		if ((error = fp_lookup(p, orig_fd, &orig_fp, 1))) {
4069 			printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
4070 			goto out;
4071 		}
4072 
4073 		if (orig_fp->f_type != DTYPE_VNODE) {
4074 			error = EBADF;
4075 			fp_drop(p, orig_fd, orig_fp, 1);
4076 			goto out;
4077 		}
4078 
4079 		ivp = (struct vnode *)fp_get_data(orig_fp);
4080 
4081 		vp = (struct vnode *)fp_get_data(fp);
4082 
4083 		proc_fdunlock(p);
4084 
4085 		error = vnode_getwithref(ivp);
4086 		if (error) {
4087 			fp_drop(p, orig_fd, orig_fp, 0);
4088 			goto outdrop; //drop fp
4089 		}
4090 
4091 		error = vnode_getwithref(vp);
4092 		if (error) {
4093 			vnode_put(ivp);
4094 			fp_drop(p, orig_fd, orig_fp, 0);
4095 			goto outdrop;
4096 		}
4097 
4098 		if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
4099 			error = E2BIG;
4100 			goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
4101 		}
4102 
4103 		kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
4104 		kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
4105 		if (kr != KERN_SUCCESS) {
4106 			error = ENOMEM;
4107 			goto dropboth;
4108 		}
4109 
4110 		int resid;
4111 		error = vn_rdwr(UIO_READ, vp,
4112 		    (caddr_t)kernel_blob_addr, (int)kernel_blob_size,
4113 		    fs.fs_file_start + fs.fs_blob_start,
4114 		    UIO_SYSSPACE, 0,
4115 		    kauth_cred_get(), &resid, p);
4116 		if ((error == 0) && resid) {
4117 			/* kernel_blob_size rounded to a page size, but signature may be at end of file */
4118 			memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
4119 		}
4120 
4121 		if (error) {
4122 			ubc_cs_blob_deallocate(kernel_blob_addr,
4123 			    kernel_blob_size);
4124 			goto dropboth;
4125 		}
4126 
4127 		error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
4128 		    &kernel_blob_addr, kernel_blob_size, &blob);
4129 
4130 		/* ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed */
4131 		if (error) {
4132 			if (kernel_blob_addr) {
4133 				ubc_cs_blob_deallocate(kernel_blob_addr,
4134 				    kernel_blob_size);
4135 			}
4136 			goto dropboth;
4137 		}
4138 		vnode_put(ivp);
4139 		vnode_put(vp);
4140 		fp_drop(p, orig_fd, orig_fp, 0);
4141 		break;
4142 
4143 dropboth:
4144 		vnode_put(ivp);
4145 		vnode_put(vp);
4146 		fp_drop(p, orig_fd, orig_fp, 0);
4147 		goto outdrop;
4148 	}
4149 #endif
4150 	case F_GETCODEDIR:
4151 	case F_FINDSIGS: {
4152 		error = ENOTSUP;
4153 		goto out;
4154 	}
4155 	case F_CHECK_LV: {
4156 		struct fileglob *fg;
4157 		fchecklv_t lv = {};
4158 
4159 		if (fp->f_type != DTYPE_VNODE) {
4160 			error = EBADF;
4161 			goto out;
4162 		}
4163 		fg = fp->fp_glob;
4164 		proc_fdunlock(p);
4165 
4166 		if (IS_64BIT_PROCESS(p)) {
4167 			error = copyin(argp, &lv, sizeof(lv));
4168 		} else {
4169 			struct user32_fchecklv lv32 = {};
4170 
4171 			error = copyin(argp, &lv32, sizeof(lv32));
4172 			lv.lv_file_start = lv32.lv_file_start;
4173 			lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
4174 			lv.lv_error_message_size = lv32.lv_error_message_size;
4175 		}
4176 		if (error) {
4177 			goto outdrop;
4178 		}
4179 
4180 #if CONFIG_MACF
4181 		error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
4182 		    (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
4183 #endif
4184 
4185 		break;
4186 	}
4187 	case F_GETSIGSINFO: {
4188 		struct cs_blob *blob = NULL;
4189 		fgetsigsinfo_t sigsinfo = {};
4190 
4191 		if (fp->f_type != DTYPE_VNODE) {
4192 			error = EBADF;
4193 			goto out;
4194 		}
4195 		vp = (struct vnode *)fp_get_data(fp);
4196 		proc_fdunlock(p);
4197 
4198 		error = vnode_getwithref(vp);
4199 		if (error) {
4200 			goto outdrop;
4201 		}
4202 
4203 		error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
4204 		if (error) {
4205 			vnode_put(vp);
4206 			goto outdrop;
4207 		}
4208 
4209 		blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
4210 		if (blob == NULL) {
4211 			error = ENOENT;
4212 			vnode_put(vp);
4213 			goto outdrop;
4214 		}
4215 		switch (sigsinfo.fg_info_request) {
4216 		case GETSIGSINFO_PLATFORM_BINARY:
4217 			sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
4218 			error = copyout(&sigsinfo.fg_sig_is_platform,
4219 			    (vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
4220 			    sizeof(sigsinfo.fg_sig_is_platform));
4221 			if (error) {
4222 				vnode_put(vp);
4223 				goto outdrop;
4224 			}
4225 			break;
4226 		default:
4227 			error = EINVAL;
4228 			vnode_put(vp);
4229 			goto outdrop;
4230 		}
4231 		vnode_put(vp);
4232 		break;
4233 	}
4234 #if CONFIG_PROTECT
4235 	case F_GETPROTECTIONCLASS: {
4236 		if (fp->f_type != DTYPE_VNODE) {
4237 			error = EBADF;
4238 			goto out;
4239 		}
4240 		vp = (struct vnode *)fp_get_data(fp);
4241 
4242 		proc_fdunlock(p);
4243 
4244 		if (vnode_getwithref(vp)) {
4245 			error = ENOENT;
4246 			goto outdrop;
4247 		}
4248 
4249 		struct vnode_attr va;
4250 
4251 		VATTR_INIT(&va);
4252 		VATTR_WANTED(&va, va_dataprotect_class);
4253 		error = VNOP_GETATTR(vp, &va, &context);
4254 		if (!error) {
4255 			if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
4256 				*retval = va.va_dataprotect_class;
4257 			} else {
4258 				error = ENOTSUP;
4259 			}
4260 		}
4261 
4262 		vnode_put(vp);
4263 		break;
4264 	}
4265 
4266 	case F_SETPROTECTIONCLASS: {
4267 		/* tmp must be a valid PROTECTION_CLASS_* */
4268 		tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4269 
4270 		if (fp->f_type != DTYPE_VNODE) {
4271 			error = EBADF;
4272 			goto out;
4273 		}
4274 		vp = (struct vnode *)fp_get_data(fp);
4275 
4276 		proc_fdunlock(p);
4277 
4278 		if (vnode_getwithref(vp)) {
4279 			error = ENOENT;
4280 			goto outdrop;
4281 		}
4282 
4283 		/* Only go forward if you have write access */
4284 		vfs_context_t ctx = vfs_context_current();
4285 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4286 			vnode_put(vp);
4287 			error = EBADF;
4288 			goto outdrop;
4289 		}
4290 
4291 		struct vnode_attr va;
4292 
4293 		VATTR_INIT(&va);
4294 		VATTR_SET(&va, va_dataprotect_class, tmp);
4295 
4296 		error = VNOP_SETATTR(vp, &va, ctx);
4297 
4298 		vnode_put(vp);
4299 		break;
4300 	}
4301 
4302 	case F_TRANSCODEKEY: {
4303 		if (fp->f_type != DTYPE_VNODE) {
4304 			error = EBADF;
4305 			goto out;
4306 		}
4307 
4308 		vp = (struct vnode *)fp_get_data(fp);
4309 		proc_fdunlock(p);
4310 
4311 		if (vnode_getwithref(vp)) {
4312 			error = ENOENT;
4313 			goto outdrop;
4314 		}
4315 
4316 		cp_key_t k = {
4317 			.len = CP_MAX_WRAPPEDKEYSIZE,
4318 		};
4319 
4320 		k.key = kalloc_data(CP_MAX_WRAPPEDKEYSIZE, Z_WAITOK | Z_ZERO);
4321 		if (k.key == NULL) {
4322 			error = ENOMEM;
4323 		} else {
4324 			error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
4325 		}
4326 
4327 		vnode_put(vp);
4328 
4329 		if (error == 0) {
4330 			error = copyout(k.key, argp, k.len);
4331 			*retval = k.len;
4332 		}
4333 		kfree_data(k.key, CP_MAX_WRAPPEDKEYSIZE);
4334 
4335 		break;
4336 	}
4337 
4338 	case F_GETPROTECTIONLEVEL:  {
4339 		if (fp->f_type != DTYPE_VNODE) {
4340 			error = EBADF;
4341 			goto out;
4342 		}
4343 
4344 		vp = (struct vnode*)fp_get_data(fp);
4345 		proc_fdunlock(p);
4346 
4347 		if (vnode_getwithref(vp)) {
4348 			error = ENOENT;
4349 			goto outdrop;
4350 		}
4351 
4352 		error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
4353 
4354 		vnode_put(vp);
4355 		break;
4356 	}
4357 
4358 	case F_GETDEFAULTPROTLEVEL:  {
4359 		if (fp->f_type != DTYPE_VNODE) {
4360 			error = EBADF;
4361 			goto out;
4362 		}
4363 
4364 		vp = (struct vnode*)fp_get_data(fp);
4365 		proc_fdunlock(p);
4366 
4367 		if (vnode_getwithref(vp)) {
4368 			error = ENOENT;
4369 			goto outdrop;
4370 		}
4371 
4372 		/*
4373 		 * if cp_get_major_vers fails, error will be set to proper errno
4374 		 * and cp_version will still be 0.
4375 		 */
4376 
4377 		error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
4378 
4379 		vnode_put(vp);
4380 		break;
4381 	}
4382 
4383 #endif /* CONFIG_PROTECT */
4384 
4385 	case F_MOVEDATAEXTENTS: {
4386 		struct fileproc *fp2 = NULL;
4387 		struct vnode *src_vp = NULLVP;
4388 		struct vnode *dst_vp = NULLVP;
4389 		/* We need to grab the 2nd FD out of the arguments before moving on. */
4390 		int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
4391 
4392 		error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
4393 		if (error) {
4394 			goto out;
4395 		}
4396 
4397 		if (fp->f_type != DTYPE_VNODE) {
4398 			error = EBADF;
4399 			goto out;
4400 		}
4401 
4402 		/*
4403 		 * For now, special case HFS+ and APFS only, since this
4404 		 * is SPI.
4405 		 */
4406 		src_vp = (struct vnode *)fp_get_data(fp);
4407 		if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
4408 			error = ENOTSUP;
4409 			goto out;
4410 		}
4411 
4412 		/*
4413 		 * Get the references before we start acquiring iocounts on the vnodes,
4414 		 * while we still hold the proc fd lock
4415 		 */
4416 		if ((error = fp_lookup(p, fd2, &fp2, 1))) {
4417 			error = EBADF;
4418 			goto out;
4419 		}
4420 		if (fp2->f_type != DTYPE_VNODE) {
4421 			fp_drop(p, fd2, fp2, 1);
4422 			error = EBADF;
4423 			goto out;
4424 		}
4425 		dst_vp = (struct vnode *)fp_get_data(fp2);
4426 		if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
4427 			fp_drop(p, fd2, fp2, 1);
4428 			error = ENOTSUP;
4429 			goto out;
4430 		}
4431 
4432 #if CONFIG_MACF
4433 		/* Re-do MAC checks against the new FD, pass in a fake argument */
4434 		error = mac_file_check_fcntl(kauth_cred_get(), fp2->fp_glob, cmd, 0);
4435 		if (error) {
4436 			fp_drop(p, fd2, fp2, 1);
4437 			goto out;
4438 		}
4439 #endif
4440 		/* Audit the 2nd FD */
4441 		AUDIT_ARG(fd, fd2);
4442 
4443 		proc_fdunlock(p);
4444 
4445 		if (vnode_getwithref(src_vp)) {
4446 			fp_drop(p, fd2, fp2, 0);
4447 			error = ENOENT;
4448 			goto outdrop;
4449 		}
4450 		if (vnode_getwithref(dst_vp)) {
4451 			vnode_put(src_vp);
4452 			fp_drop(p, fd2, fp2, 0);
4453 			error = ENOENT;
4454 			goto outdrop;
4455 		}
4456 
4457 		/*
4458 		 * Basic asserts; validate they are not the same and that
4459 		 * both live on the same filesystem.
4460 		 */
4461 		if (dst_vp == src_vp) {
4462 			vnode_put(src_vp);
4463 			vnode_put(dst_vp);
4464 			fp_drop(p, fd2, fp2, 0);
4465 			error = EINVAL;
4466 			goto outdrop;
4467 		}
4468 
4469 		if (dst_vp->v_mount != src_vp->v_mount) {
4470 			vnode_put(src_vp);
4471 			vnode_put(dst_vp);
4472 			fp_drop(p, fd2, fp2, 0);
4473 			error = EXDEV;
4474 			goto outdrop;
4475 		}
4476 
4477 		/* Now we have a legit pair of FDs.  Go to work */
4478 
4479 		/* Now check for write access to the target files */
4480 		if (vnode_authorize(src_vp, NULLVP,
4481 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4482 			vnode_put(src_vp);
4483 			vnode_put(dst_vp);
4484 			fp_drop(p, fd2, fp2, 0);
4485 			error = EBADF;
4486 			goto outdrop;
4487 		}
4488 
4489 		if (vnode_authorize(dst_vp, NULLVP,
4490 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4491 			vnode_put(src_vp);
4492 			vnode_put(dst_vp);
4493 			fp_drop(p, fd2, fp2, 0);
4494 			error = EBADF;
4495 			goto outdrop;
4496 		}
4497 
4498 		/* Verify that both vps point to files and not directories */
4499 		if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
4500 			error = EINVAL;
4501 			vnode_put(src_vp);
4502 			vnode_put(dst_vp);
4503 			fp_drop(p, fd2, fp2, 0);
4504 			goto outdrop;
4505 		}
4506 
4507 		/*
4508 		 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
4509 		 * We'll pass in our special bit indicating that the new behavior is expected
4510 		 */
4511 
4512 		error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
4513 
4514 		vnode_put(src_vp);
4515 		vnode_put(dst_vp);
4516 		fp_drop(p, fd2, fp2, 0);
4517 		break;
4518 	}
4519 
4520 	case F_TRANSFEREXTENTS: {
4521 		struct fileproc *fp2 = NULL;
4522 		struct vnode *src_vp = NULLVP;
4523 		struct vnode *dst_vp = NULLVP;
4524 
4525 		/* Get 2nd FD out of the arguments. */
4526 		int fd2 = CAST_DOWN_EXPLICIT(int, uap->arg);
4527 		if (fd2 < 0) {
4528 			error = EINVAL;
4529 			goto out;
4530 		}
4531 
4532 		if (fp->f_type != DTYPE_VNODE) {
4533 			error = EBADF;
4534 			goto out;
4535 		}
4536 
4537 		/*
4538 		 * Only allow this for APFS
4539 		 */
4540 		src_vp = (struct vnode *)fp_get_data(fp);
4541 		if (src_vp->v_tag != VT_APFS) {
4542 			error = ENOTSUP;
4543 			goto out;
4544 		}
4545 
4546 		/*
4547 		 * Get the references before we start acquiring iocounts on the vnodes,
4548 		 * while we still hold the proc fd lock
4549 		 */
4550 		if ((error = fp_lookup(p, fd2, &fp2, 1))) {
4551 			error = EBADF;
4552 			goto out;
4553 		}
4554 		if (fp2->f_type != DTYPE_VNODE) {
4555 			fp_drop(p, fd2, fp2, 1);
4556 			error = EBADF;
4557 			goto out;
4558 		}
4559 		dst_vp = (struct vnode *)fp_get_data(fp2);
4560 		if (dst_vp->v_tag != VT_APFS) {
4561 			fp_drop(p, fd2, fp2, 1);
4562 			error = ENOTSUP;
4563 			goto out;
4564 		}
4565 
4566 #if CONFIG_MACF
4567 		/* Re-do MAC checks against the new FD, pass in a fake argument */
4568 		error = mac_file_check_fcntl(kauth_cred_get(), fp2->fp_glob, cmd, 0);
4569 		if (error) {
4570 			fp_drop(p, fd2, fp2, 1);
4571 			goto out;
4572 		}
4573 #endif
4574 		/* Audit the 2nd FD */
4575 		AUDIT_ARG(fd, fd2);
4576 
4577 		proc_fdunlock(p);
4578 
4579 		if (vnode_getwithref(src_vp)) {
4580 			fp_drop(p, fd2, fp2, 0);
4581 			error = ENOENT;
4582 			goto outdrop;
4583 		}
4584 		if (vnode_getwithref(dst_vp)) {
4585 			vnode_put(src_vp);
4586 			fp_drop(p, fd2, fp2, 0);
4587 			error = ENOENT;
4588 			goto outdrop;
4589 		}
4590 
4591 		/*
4592 		 * Validate they are not the same and that
4593 		 * both live on the same filesystem.
4594 		 */
4595 		if (dst_vp == src_vp) {
4596 			vnode_put(src_vp);
4597 			vnode_put(dst_vp);
4598 			fp_drop(p, fd2, fp2, 0);
4599 			error = EINVAL;
4600 			goto outdrop;
4601 		}
4602 		if (dst_vp->v_mount != src_vp->v_mount) {
4603 			vnode_put(src_vp);
4604 			vnode_put(dst_vp);
4605 			fp_drop(p, fd2, fp2, 0);
4606 			error = EXDEV;
4607 			goto outdrop;
4608 		}
4609 
4610 		/* Verify that both vps point to files and not directories */
4611 		if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
4612 			error = EINVAL;
4613 			vnode_put(src_vp);
4614 			vnode_put(dst_vp);
4615 			fp_drop(p, fd2, fp2, 0);
4616 			goto outdrop;
4617 		}
4618 
4619 
4620 		/*
4621 		 * Okay, vps are legit. Check  access.  We'll require write access
4622 		 * to both files.
4623 		 */
4624 		if (vnode_authorize(src_vp, NULLVP,
4625 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4626 			vnode_put(src_vp);
4627 			vnode_put(dst_vp);
4628 			fp_drop(p, fd2, fp2, 0);
4629 			error = EBADF;
4630 			goto outdrop;
4631 		}
4632 		if (vnode_authorize(dst_vp, NULLVP,
4633 		    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4634 			vnode_put(src_vp);
4635 			vnode_put(dst_vp);
4636 			fp_drop(p, fd2, fp2, 0);
4637 			error = EBADF;
4638 			goto outdrop;
4639 		}
4640 
4641 		/* Pass it on through to the fs */
4642 		error = VNOP_IOCTL(src_vp, cmd, (caddr_t)dst_vp, 0, &context);
4643 
4644 		vnode_put(src_vp);
4645 		vnode_put(dst_vp);
4646 		fp_drop(p, fd2, fp2, 0);
4647 		break;
4648 	}
4649 
4650 	/*
4651 	 * SPI for making a file compressed.
4652 	 */
4653 	case F_MAKECOMPRESSED: {
4654 		uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4655 
4656 		if (fp->f_type != DTYPE_VNODE) {
4657 			error = EBADF;
4658 			goto out;
4659 		}
4660 
4661 		vp = (struct vnode*)fp_get_data(fp);
4662 		proc_fdunlock(p);
4663 
4664 		/* get the vnode */
4665 		if (vnode_getwithref(vp)) {
4666 			error = ENOENT;
4667 			goto outdrop;
4668 		}
4669 
4670 		/* Is it a file? */
4671 		if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
4672 			vnode_put(vp);
4673 			error = EBADF;
4674 			goto outdrop;
4675 		}
4676 
4677 		/* invoke ioctl to pass off to FS */
4678 		/* Only go forward if you have write access */
4679 		vfs_context_t ctx = vfs_context_current();
4680 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4681 			vnode_put(vp);
4682 			error = EBADF;
4683 			goto outdrop;
4684 		}
4685 
4686 		error = VNOP_IOCTL(vp, cmd, (caddr_t)&gcounter, 0, &context);
4687 
4688 		vnode_put(vp);
4689 		break;
4690 	}
4691 
4692 	/*
4693 	 * SPI (private) for indicating to a filesystem that subsequent writes to
4694 	 * the open FD will written to the Fastflow.
4695 	 */
4696 	case F_SET_GREEDY_MODE:
4697 	/* intentionally drop through to the same handler as F_SETSTATIC.
4698 	 * both fcntls should pass the argument and their selector into VNOP_IOCTL.
4699 	 */
4700 
4701 	/*
4702 	 * SPI (private) for indicating to a filesystem that subsequent writes to
4703 	 * the open FD will represent static content.
4704 	 */
4705 	case F_SETSTATICCONTENT: {
4706 		caddr_t ioctl_arg = NULL;
4707 
4708 		if (uap->arg) {
4709 			ioctl_arg = (caddr_t) 1;
4710 		}
4711 
4712 		if (fp->f_type != DTYPE_VNODE) {
4713 			error = EBADF;
4714 			goto out;
4715 		}
4716 		vp = (struct vnode *)fp_get_data(fp);
4717 		proc_fdunlock(p);
4718 
4719 		error = vnode_getwithref(vp);
4720 		if (error) {
4721 			error = ENOENT;
4722 			goto outdrop;
4723 		}
4724 
4725 		/* Only go forward if you have write access */
4726 		vfs_context_t ctx = vfs_context_current();
4727 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4728 			vnode_put(vp);
4729 			error = EBADF;
4730 			goto outdrop;
4731 		}
4732 
4733 		error = VNOP_IOCTL(vp, cmd, ioctl_arg, 0, &context);
4734 		(void)vnode_put(vp);
4735 
4736 		break;
4737 	}
4738 
4739 	/*
4740 	 * SPI (private) for indicating to the lower level storage driver that the
4741 	 * subsequent writes should be of a particular IO type (burst, greedy, static),
4742 	 * or other flavors that may be necessary.
4743 	 */
4744 	case F_SETIOTYPE: {
4745 		caddr_t param_ptr;
4746 		uint32_t param;
4747 
4748 		if (uap->arg) {
4749 			/* extract 32 bits of flags from userland */
4750 			param_ptr = (caddr_t) uap->arg;
4751 			param = (uint32_t) param_ptr;
4752 		} else {
4753 			/* If no argument is specified, error out */
4754 			error = EINVAL;
4755 			goto out;
4756 		}
4757 
4758 		/*
4759 		 * Validate the different types of flags that can be specified:
4760 		 * all of them are mutually exclusive for now.
4761 		 */
4762 		switch (param) {
4763 		case F_IOTYPE_ISOCHRONOUS:
4764 			break;
4765 
4766 		default:
4767 			error = EINVAL;
4768 			goto out;
4769 		}
4770 
4771 
4772 		if (fp->f_type != DTYPE_VNODE) {
4773 			error = EBADF;
4774 			goto out;
4775 		}
4776 		vp = (struct vnode *)fp_get_data(fp);
4777 		proc_fdunlock(p);
4778 
4779 		error = vnode_getwithref(vp);
4780 		if (error) {
4781 			error = ENOENT;
4782 			goto outdrop;
4783 		}
4784 
4785 		/* Only go forward if you have write access */
4786 		vfs_context_t ctx = vfs_context_current();
4787 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4788 			vnode_put(vp);
4789 			error = EBADF;
4790 			goto outdrop;
4791 		}
4792 
4793 		error = VNOP_IOCTL(vp, cmd, param_ptr, 0, &context);
4794 		(void)vnode_put(vp);
4795 
4796 		break;
4797 	}
4798 
4799 	/*
4800 	 * Set the vnode pointed to by 'fd'
4801 	 * and tag it as the (potentially future) backing store
4802 	 * for another filesystem
4803 	 */
4804 	case F_SETBACKINGSTORE: {
4805 		if (fp->f_type != DTYPE_VNODE) {
4806 			error = EBADF;
4807 			goto out;
4808 		}
4809 
4810 		vp = (struct vnode *)fp_get_data(fp);
4811 
4812 		if (vp->v_tag != VT_HFS) {
4813 			error = EINVAL;
4814 			goto out;
4815 		}
4816 		proc_fdunlock(p);
4817 
4818 		if (vnode_getwithref(vp)) {
4819 			error = ENOENT;
4820 			goto outdrop;
4821 		}
4822 
4823 		/* only proceed if you have write access */
4824 		vfs_context_t ctx = vfs_context_current();
4825 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4826 			vnode_put(vp);
4827 			error = EBADF;
4828 			goto outdrop;
4829 		}
4830 
4831 
4832 		/* If arg != 0, set, otherwise unset */
4833 		if (uap->arg) {
4834 			error = VNOP_IOCTL(vp, cmd, (caddr_t)1, 0, &context);
4835 		} else {
4836 			error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
4837 		}
4838 
4839 		vnode_put(vp);
4840 		break;
4841 	}
4842 
4843 	/*
4844 	 * like F_GETPATH, but special semantics for
4845 	 * the mobile time machine handler.
4846 	 */
4847 	case F_GETPATH_MTMINFO: {
4848 		char *pathbufp;
4849 		int pathlen;
4850 
4851 		if (fp->f_type != DTYPE_VNODE) {
4852 			error = EBADF;
4853 			goto out;
4854 		}
4855 		vp = (struct vnode *)fp_get_data(fp);
4856 		proc_fdunlock(p);
4857 
4858 		pathlen = MAXPATHLEN;
4859 		pathbufp = zalloc(ZV_NAMEI);
4860 
4861 		if ((error = vnode_getwithref(vp)) == 0) {
4862 			int backingstore = 0;
4863 
4864 			/* Check for error from vn_getpath before moving on */
4865 			if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
4866 				if (vp->v_tag == VT_HFS) {
4867 					error = VNOP_IOCTL(vp, cmd, (caddr_t) &backingstore, 0, &context);
4868 				}
4869 				(void)vnode_put(vp);
4870 
4871 				if (error == 0) {
4872 					error = copyout((caddr_t)pathbufp, argp, pathlen);
4873 				}
4874 				if (error == 0) {
4875 					/*
4876 					 * If the copyout was successful, now check to ensure
4877 					 * that this vnode is not a BACKINGSTORE vnode.  mtmd
4878 					 * wants the path regardless.
4879 					 */
4880 					if (backingstore) {
4881 						error = EBUSY;
4882 					}
4883 				}
4884 			} else {
4885 				(void)vnode_put(vp);
4886 			}
4887 		}
4888 
4889 		zfree(ZV_NAMEI, pathbufp);
4890 		goto outdrop;
4891 	}
4892 
4893 	case F_RECYCLE: {
4894 #if !DEBUG && !DEVELOPMENT
4895 		bool allowed = false;
4896 
4897 		//
4898 		// non-debug and non-development kernels have restrictions
4899 		// on who can all this fcntl.  the process has to be marked
4900 		// with the dataless-manipulator entitlement and either the
4901 		// process or thread have to be marked rapid-aging.
4902 		//
4903 		if (!vfs_context_is_dataless_manipulator(&context)) {
4904 			error = EPERM;
4905 			goto out;
4906 		}
4907 
4908 		proc_t proc = vfs_context_proc(&context);
4909 		if (proc && (proc->p_lflag & P_LRAGE_VNODES)) {
4910 			allowed = true;
4911 		} else {
4912 			thread_t thr = vfs_context_thread(&context);
4913 			if (thr) {
4914 				struct uthread *ut = get_bsdthread_info(thr);
4915 
4916 				if (ut && (ut->uu_flag & UT_RAGE_VNODES)) {
4917 					allowed = true;
4918 				}
4919 			}
4920 		}
4921 		if (!allowed) {
4922 			error = EPERM;
4923 			goto out;
4924 		}
4925 #endif
4926 
4927 		if (fp->f_type != DTYPE_VNODE) {
4928 			error = EBADF;
4929 			goto out;
4930 		}
4931 		vp = (struct vnode *)fp_get_data(fp);
4932 		proc_fdunlock(p);
4933 
4934 		vnode_recycle(vp);
4935 		break;
4936 	}
4937 
4938 #if CONFIG_FILE_LEASES
4939 	case F_SETLEASE: {
4940 		struct fileglob *fg;
4941 		int fl_type;
4942 		int expcounts;
4943 
4944 		if (fp->f_type != DTYPE_VNODE) {
4945 			error = EBADF;
4946 			goto out;
4947 		}
4948 		vp = (struct vnode *)fp_get_data(fp);
4949 		fg = fp->fp_glob;;
4950 		proc_fdunlock(p);
4951 
4952 		/*
4953 		 * In order to allow a process to avoid breaking
4954 		 * its own leases, the expected open count needs
4955 		 * to be provided to F_SETLEASE when placing write lease.
4956 		 * Similarly, in order to allow a process to place a read lease
4957 		 * after opening the file multiple times in RW mode, the expected
4958 		 * write count needs to be provided to F_SETLEASE when placing a
4959 		 * read lease.
4960 		 *
4961 		 * We use the upper 30 bits of the integer argument (way more than
4962 		 * enough) as the expected open/write count.
4963 		 *
4964 		 * If the caller passed 0 for the expected open count,
4965 		 * assume 1.
4966 		 */
4967 		fl_type = CAST_DOWN_EXPLICIT(int, uap->arg);
4968 		expcounts = (unsigned int)fl_type >> 2;
4969 		fl_type &= 3;
4970 
4971 		if (fl_type == F_WRLCK && expcounts == 0) {
4972 			expcounts = 1;
4973 		}
4974 
4975 		AUDIT_ARG(value32, fl_type);
4976 
4977 		if ((error = vnode_getwithref(vp))) {
4978 			goto outdrop;
4979 		}
4980 
4981 		/*
4982 		 * Only support for regular file/dir mounted on local-based filesystem.
4983 		 */
4984 		if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VDIR) ||
4985 		    !(vfs_flags(vnode_mount(vp)) & MNT_LOCAL)) {
4986 			error = EBADF;
4987 			vnode_put(vp);
4988 			goto outdrop;
4989 		}
4990 
4991 		/* For directory, we only support read lease. */
4992 		if (vnode_vtype(vp) == VDIR && fl_type == F_WRLCK) {
4993 			error = ENOTSUP;
4994 			vnode_put(vp);
4995 			goto outdrop;
4996 		}
4997 
4998 		switch (fl_type) {
4999 		case F_RDLCK:
5000 		case F_WRLCK:
5001 		case F_UNLCK:
5002 			error = vnode_setlease(vp, fg, fl_type, expcounts,
5003 			    vfs_context_current());
5004 			break;
5005 		default:
5006 			error = EINVAL;
5007 			break;
5008 		}
5009 
5010 		vnode_put(vp);
5011 		goto outdrop;
5012 	}
5013 
5014 	case F_GETLEASE: {
5015 		if (fp->f_type != DTYPE_VNODE) {
5016 			error = EBADF;
5017 			goto out;
5018 		}
5019 		vp = (struct vnode *)fp_get_data(fp);
5020 		proc_fdunlock(p);
5021 
5022 		if ((error = vnode_getwithref(vp))) {
5023 			goto outdrop;
5024 		}
5025 
5026 		if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VDIR) ||
5027 		    !(vfs_flags(vnode_mount(vp)) & MNT_LOCAL)) {
5028 			error = EBADF;
5029 			vnode_put(vp);
5030 			goto outdrop;
5031 		}
5032 
5033 		error = 0;
5034 		*retval = vnode_getlease(vp);
5035 		vnode_put(vp);
5036 		goto outdrop;
5037 	}
5038 #endif /* CONFIG_FILE_LEASES */
5039 
5040 	/* SPI (private) for asserting background access to a file */
5041 	case F_ASSERT_BG_ACCESS:
5042 	/* SPI (private) for releasing background access to a file */
5043 	case F_RELEASE_BG_ACCESS: {
5044 		/*
5045 		 * Check if the process is platform code, which means
5046 		 * that it is considered part of the Operating System.
5047 		 */
5048 		if (!csproc_get_platform_binary(p)) {
5049 			error = EPERM;
5050 			goto out;
5051 		}
5052 
5053 		if (fp->f_type != DTYPE_VNODE) {
5054 			error = EBADF;
5055 			goto out;
5056 		}
5057 
5058 		vp = (struct vnode *)fp_get_data(fp);
5059 		proc_fdunlock(p);
5060 
5061 		if (vnode_getwithref(vp)) {
5062 			error = ENOENT;
5063 			goto outdrop;
5064 		}
5065 
5066 		/* Verify that vp points to a file and not a directory */
5067 		if (!vnode_isreg(vp)) {
5068 			vnode_put(vp);
5069 			error = EINVAL;
5070 			goto outdrop;
5071 		}
5072 
5073 		/* Only proceed if you have write access */
5074 		if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
5075 			vnode_put(vp);
5076 			error = EBADF;
5077 			goto outdrop;
5078 		}
5079 
5080 		if (cmd == F_ASSERT_BG_ACCESS) {
5081 			fassertbgaccess_t args;
5082 
5083 			if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
5084 				vnode_put(vp);
5085 				goto outdrop;
5086 			}
5087 
5088 			error = VNOP_IOCTL(vp, F_ASSERT_BG_ACCESS, (caddr_t)&args, 0, &context);
5089 		} else {
5090 			// cmd == F_RELEASE_BG_ACCESS
5091 			error = VNOP_IOCTL(vp, F_RELEASE_BG_ACCESS, (caddr_t)NULL, 0, &context);
5092 		}
5093 
5094 		vnode_put(vp);
5095 
5096 		goto outdrop;
5097 	}
5098 
5099 	default:
5100 		/*
5101 		 * This is an fcntl() that we d not recognize at this level;
5102 		 * if this is a vnode, we send it down into the VNOP_IOCTL
5103 		 * for this vnode; this can include special devices, and will
5104 		 * effectively overload fcntl() to send ioctl()'s.
5105 		 */
5106 		if ((cmd & IOC_VOID) && (cmd & IOC_INOUT)) {
5107 			error = EINVAL;
5108 			goto out;
5109 		}
5110 
5111 		/* Catch any now-invalid fcntl() selectors */
5112 		switch (cmd) {
5113 		case (int)APFSIOC_REVERT_TO_SNAPSHOT:
5114 		case (int)FSIOC_FIOSEEKHOLE:
5115 		case (int)FSIOC_FIOSEEKDATA:
5116 		case (int)FSIOC_CAS_BSDFLAGS:
5117 		case (int)FSIOC_AUTH_FS:
5118 		case HFS_GET_BOOT_INFO:
5119 		case HFS_SET_BOOT_INFO:
5120 		case FIOPINSWAP:
5121 		case F_MARKDEPENDENCY:
5122 		case TIOCREVOKE:
5123 		case TIOCREVOKECLEAR:
5124 			error = EINVAL;
5125 			goto out;
5126 		default:
5127 			break;
5128 		}
5129 
5130 		if (fp->f_type != DTYPE_VNODE) {
5131 			error = EBADF;
5132 			goto out;
5133 		}
5134 		vp = (struct vnode *)fp_get_data(fp);
5135 		proc_fdunlock(p);
5136 
5137 		if ((error = vnode_getwithref(vp)) == 0) {
5138 #define STK_PARAMS 128
5139 			char stkbuf[STK_PARAMS] = {0};
5140 			unsigned int size;
5141 			caddr_t data, memp;
5142 			/*
5143 			 * For this to work properly, we have to copy in the
5144 			 * ioctl() cmd argument if there is one; we must also
5145 			 * check that a command parameter, if present, does
5146 			 * not exceed the maximum command length dictated by
5147 			 * the number of bits we have available in the command
5148 			 * to represent a structure length.  Finally, we have
5149 			 * to copy the results back out, if it is that type of
5150 			 * ioctl().
5151 			 */
5152 			size = IOCPARM_LEN(cmd);
5153 			if (size > IOCPARM_MAX) {
5154 				(void)vnode_put(vp);
5155 				error = EINVAL;
5156 				break;
5157 			}
5158 
5159 			memp = NULL;
5160 			if (size > sizeof(stkbuf)) {
5161 				memp = (caddr_t)kalloc_data(size, Z_WAITOK);
5162 				if (memp == 0) {
5163 					(void)vnode_put(vp);
5164 					error = ENOMEM;
5165 					goto outdrop;
5166 				}
5167 				data = memp;
5168 			} else {
5169 				data = &stkbuf[0];
5170 			}
5171 
5172 			if (cmd & IOC_IN) {
5173 				if (size) {
5174 					/* structure */
5175 					error = copyin(argp, data, size);
5176 					if (error) {
5177 						(void)vnode_put(vp);
5178 						if (memp) {
5179 							kfree_data(memp, size);
5180 						}
5181 						goto outdrop;
5182 					}
5183 
5184 					/* Bzero the section beyond that which was needed */
5185 					if (size <= sizeof(stkbuf)) {
5186 						bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
5187 					}
5188 				} else {
5189 					/* int */
5190 					if (is64bit) {
5191 						*(user_addr_t *)data = argp;
5192 					} else {
5193 						*(uint32_t *)data = (uint32_t)argp;
5194 					}
5195 				};
5196 			} else if ((cmd & IOC_OUT) && size) {
5197 				/*
5198 				 * Zero the buffer so the user always
5199 				 * gets back something deterministic.
5200 				 */
5201 				bzero(data, size);
5202 			} else if (cmd & IOC_VOID) {
5203 				if (is64bit) {
5204 					*(user_addr_t *)data = argp;
5205 				} else {
5206 					*(uint32_t *)data = (uint32_t)argp;
5207 				}
5208 			}
5209 
5210 			error = VNOP_IOCTL(vp, cmd, CAST_DOWN(caddr_t, data), 0, &context);
5211 
5212 			(void)vnode_put(vp);
5213 
5214 			/* Copy any output data to user */
5215 			if (error == 0 && (cmd & IOC_OUT) && size) {
5216 				error = copyout(data, argp, size);
5217 			}
5218 			if (memp) {
5219 				kfree_data(memp, size);
5220 			}
5221 		}
5222 		break;
5223 	}
5224 
5225 outdrop:
5226 	return sys_fcntl_outdrop(p, fd, fp, vp, error);
5227 
5228 out:
5229 	return sys_fcntl_out(p, fd, fp, error);
5230 }
5231 
5232 
5233 /*
5234  * sys_close
5235  *
5236  * Description:	The implementation of the close(2) system call
5237  *
5238  * Parameters:	p			Process in whose per process file table
5239  *					the close is to occur
5240  *		uap->fd			fd to be closed
5241  *		retval			<unused>
5242  *
5243  * Returns:	0			Success
5244  *	fp_lookup:EBADF			Bad file descriptor
5245  *      fp_guard_exception:???          Guarded file descriptor
5246  *	close_internal:EBADF
5247  *	close_internal:???              Anything returnable by a per-fileops
5248  *					close function
5249  */
5250 int
sys_close(proc_t p,struct close_args * uap,__unused int32_t * retval)5251 sys_close(proc_t p, struct close_args *uap, __unused int32_t *retval)
5252 {
5253 	__pthread_testcancel(1);
5254 	return close_nocancel(p, uap->fd);
5255 }
5256 
5257 int
sys_close_nocancel(proc_t p,struct close_nocancel_args * uap,__unused int32_t * retval)5258 sys_close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
5259 {
5260 	return close_nocancel(p, uap->fd);
5261 }
5262 
5263 int
close_nocancel(proc_t p,int fd)5264 close_nocancel(proc_t p, int fd)
5265 {
5266 	struct fileproc *fp;
5267 
5268 	AUDIT_SYSCLOSE(p, fd);
5269 
5270 	proc_fdlock(p);
5271 	if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
5272 		proc_fdunlock(p);
5273 		return EBADF;
5274 	}
5275 
5276 	if (fp_isguarded(fp, GUARD_CLOSE)) {
5277 		int error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
5278 		proc_fdunlock(p);
5279 		return error;
5280 	}
5281 
5282 	return fp_close_and_unlock(p, fd, fp, 0);
5283 }
5284 
5285 
5286 /*
5287  * fstat
5288  *
5289  * Description:	Return status information about a file descriptor.
5290  *
5291  * Parameters:	p				The process doing the fstat
5292  *		fd				The fd to stat
5293  *		ub				The user stat buffer
5294  *		xsecurity			The user extended security
5295  *						buffer, or 0 if none
5296  *		xsecurity_size			The size of xsecurity, or 0
5297  *						if no xsecurity
5298  *		isstat64			Flag to indicate 64 bit version
5299  *						for inode size, etc.
5300  *
5301  * Returns:	0				Success
5302  *		EBADF
5303  *		EFAULT
5304  *	fp_lookup:EBADF				Bad file descriptor
5305  *	vnode_getwithref:???
5306  *	copyout:EFAULT
5307  *	vnode_getwithref:???
5308  *	vn_stat:???
5309  *	soo_stat:???
5310  *	pipe_stat:???
5311  *	pshm_stat:???
5312  *	kqueue_stat:???
5313  *
5314  * Notes:	Internal implementation for all other fstat() related
5315  *		functions
5316  *
5317  *		XXX switch on node type is bogus; need a stat in struct
5318  *		XXX fileops instead.
5319  */
5320 static int
fstat(proc_t p,int fd,user_addr_t ub,user_addr_t xsecurity,user_addr_t xsecurity_size,int isstat64)5321 fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity,
5322     user_addr_t xsecurity_size, int isstat64)
5323 {
5324 	struct fileproc *fp;
5325 	union {
5326 		struct stat sb;
5327 		struct stat64 sb64;
5328 	} source;
5329 	union {
5330 		struct user64_stat user64_sb;
5331 		struct user32_stat user32_sb;
5332 		struct user64_stat64 user64_sb64;
5333 		struct user32_stat64 user32_sb64;
5334 	} dest;
5335 	int error, my_size;
5336 	file_type_t type;
5337 	caddr_t data;
5338 	kauth_filesec_t fsec;
5339 	user_size_t xsecurity_bufsize;
5340 	vfs_context_t ctx = vfs_context_current();
5341 	void * sbptr;
5342 
5343 
5344 	AUDIT_ARG(fd, fd);
5345 
5346 	if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
5347 		return error;
5348 	}
5349 	type = fp->f_type;
5350 	data = (caddr_t)fp_get_data(fp);
5351 	fsec = KAUTH_FILESEC_NONE;
5352 
5353 	sbptr = (void *)&source;
5354 
5355 	switch (type) {
5356 	case DTYPE_VNODE:
5357 		if ((error = vnode_getwithref((vnode_t)data)) == 0) {
5358 			/*
5359 			 * If the caller has the file open, and is not
5360 			 * requesting extended security information, we are
5361 			 * going to let them get the basic stat information.
5362 			 */
5363 			if (xsecurity == USER_ADDR_NULL) {
5364 				error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
5365 				    fp->fp_glob->fg_cred);
5366 			} else {
5367 				error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
5368 			}
5369 
5370 			AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
5371 			(void)vnode_put((vnode_t)data);
5372 		}
5373 		break;
5374 
5375 #if SOCKETS
5376 	case DTYPE_SOCKET:
5377 		error = soo_stat((struct socket *)data, sbptr, isstat64);
5378 		break;
5379 #endif /* SOCKETS */
5380 
5381 	case DTYPE_PIPE:
5382 		error = pipe_stat((void *)data, sbptr, isstat64);
5383 		break;
5384 
5385 	case DTYPE_PSXSHM:
5386 		error = pshm_stat((void *)data, sbptr, isstat64);
5387 		break;
5388 
5389 	case DTYPE_KQUEUE:
5390 		error = kqueue_stat((void *)data, sbptr, isstat64, p);
5391 		break;
5392 
5393 	default:
5394 		error = EBADF;
5395 		goto out;
5396 	}
5397 	if (error == 0) {
5398 		caddr_t sbp;
5399 
5400 		if (isstat64 != 0) {
5401 			source.sb64.st_lspare = 0;
5402 			source.sb64.st_qspare[0] = 0LL;
5403 			source.sb64.st_qspare[1] = 0LL;
5404 
5405 			if (IS_64BIT_PROCESS(p)) {
5406 				munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5407 				my_size = sizeof(dest.user64_sb64);
5408 				sbp = (caddr_t)&dest.user64_sb64;
5409 			} else {
5410 				munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5411 				my_size = sizeof(dest.user32_sb64);
5412 				sbp = (caddr_t)&dest.user32_sb64;
5413 			}
5414 		} else {
5415 			source.sb.st_lspare = 0;
5416 			source.sb.st_qspare[0] = 0LL;
5417 			source.sb.st_qspare[1] = 0LL;
5418 			if (IS_64BIT_PROCESS(p)) {
5419 				munge_user64_stat(&source.sb, &dest.user64_sb);
5420 				my_size = sizeof(dest.user64_sb);
5421 				sbp = (caddr_t)&dest.user64_sb;
5422 			} else {
5423 				munge_user32_stat(&source.sb, &dest.user32_sb);
5424 				my_size = sizeof(dest.user32_sb);
5425 				sbp = (caddr_t)&dest.user32_sb;
5426 			}
5427 		}
5428 
5429 		error = copyout(sbp, ub, my_size);
5430 	}
5431 
5432 	/* caller wants extended security information? */
5433 	if (xsecurity != USER_ADDR_NULL) {
5434 		/* did we get any? */
5435 		if (fsec == KAUTH_FILESEC_NONE) {
5436 			if (susize(xsecurity_size, 0) != 0) {
5437 				error = EFAULT;
5438 				goto out;
5439 			}
5440 		} else {
5441 			/* find the user buffer size */
5442 			xsecurity_bufsize = fusize(xsecurity_size);
5443 
5444 			/* copy out the actual data size */
5445 			if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5446 				error = EFAULT;
5447 				goto out;
5448 			}
5449 
5450 			/* if the caller supplied enough room, copy out to it */
5451 			if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
5452 				error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5453 			}
5454 		}
5455 	}
5456 out:
5457 	fp_drop(p, fd, fp, 0);
5458 	if (fsec != NULL) {
5459 		kauth_filesec_free(fsec);
5460 	}
5461 	return error;
5462 }
5463 
5464 
5465 /*
5466  * sys_fstat_extended
5467  *
5468  * Description:	Extended version of fstat supporting returning extended
5469  *		security information
5470  *
5471  * Parameters:	p				The process doing the fstat
5472  *		uap->fd				The fd to stat
5473  *		uap->ub				The user stat buffer
5474  *		uap->xsecurity			The user extended security
5475  *						buffer, or 0 if none
5476  *		uap->xsecurity_size		The size of xsecurity, or 0
5477  *
5478  * Returns:	0				Success
5479  *		!0				Errno (see fstat)
5480  */
5481 int
sys_fstat_extended(proc_t p,struct fstat_extended_args * uap,__unused int32_t * retval)5482 sys_fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
5483 {
5484 	return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
5485 }
5486 
5487 
5488 /*
5489  * sys_fstat
5490  *
5491  * Description:	Get file status for the file associated with fd
5492  *
5493  * Parameters:	p				The process doing the fstat
5494  *		uap->fd				The fd to stat
5495  *		uap->ub				The user stat buffer
5496  *
5497  * Returns:	0				Success
5498  *		!0				Errno (see fstat)
5499  */
5500 int
sys_fstat(proc_t p,struct fstat_args * uap,__unused int32_t * retval)5501 sys_fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
5502 {
5503 	return fstat(p, uap->fd, uap->ub, 0, 0, 0);
5504 }
5505 
5506 
5507 /*
5508  * sys_fstat64_extended
5509  *
5510  * Description:	Extended version of fstat64 supporting returning extended
5511  *		security information
5512  *
5513  * Parameters:	p				The process doing the fstat
5514  *		uap->fd				The fd to stat
5515  *		uap->ub				The user stat buffer
5516  *		uap->xsecurity			The user extended security
5517  *						buffer, or 0 if none
5518  *		uap->xsecurity_size		The size of xsecurity, or 0
5519  *
5520  * Returns:	0				Success
5521  *		!0				Errno (see fstat)
5522  */
5523 int
sys_fstat64_extended(proc_t p,struct fstat64_extended_args * uap,__unused int32_t * retval)5524 sys_fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
5525 {
5526 	return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
5527 }
5528 
5529 
5530 /*
5531  * sys_fstat64
5532  *
5533  * Description:	Get 64 bit version of the file status for the file associated
5534  *		with fd
5535  *
5536  * Parameters:	p				The process doing the fstat
5537  *		uap->fd				The fd to stat
5538  *		uap->ub				The user stat buffer
5539  *
5540  * Returns:	0				Success
5541  *		!0				Errno (see fstat)
5542  */
5543 int
sys_fstat64(proc_t p,struct fstat64_args * uap,__unused int32_t * retval)5544 sys_fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
5545 {
5546 	return fstat(p, uap->fd, uap->ub, 0, 0, 1);
5547 }
5548 
5549 
5550 /*
5551  * sys_fpathconf
5552  *
5553  * Description:	Return pathconf information about a file descriptor.
5554  *
5555  * Parameters:	p				Process making the request
5556  *		uap->fd				fd to get information about
5557  *		uap->name			Name of information desired
5558  *		retval				Pointer to the call return area
5559  *
5560  * Returns:	0				Success
5561  *		EINVAL
5562  *	fp_lookup:EBADF				Bad file descriptor
5563  *	vnode_getwithref:???
5564  *	vn_pathconf:???
5565  *
5566  * Implicit returns:
5567  *		*retval (modified)		Returned information (numeric)
5568  */
5569 int
sys_fpathconf(proc_t p,struct fpathconf_args * uap,int32_t * retval)5570 sys_fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
5571 {
5572 	int fd = uap->fd;
5573 	struct fileproc *fp;
5574 	struct vnode *vp;
5575 	int error = 0;
5576 	file_type_t type;
5577 
5578 
5579 	AUDIT_ARG(fd, uap->fd);
5580 	if ((error = fp_lookup(p, fd, &fp, 0))) {
5581 		return error;
5582 	}
5583 	type = fp->f_type;
5584 
5585 	switch (type) {
5586 	case DTYPE_SOCKET:
5587 		if (uap->name != _PC_PIPE_BUF) {
5588 			error = EINVAL;
5589 			goto out;
5590 		}
5591 		*retval = PIPE_BUF;
5592 		error = 0;
5593 		goto out;
5594 
5595 	case DTYPE_PIPE:
5596 		if (uap->name != _PC_PIPE_BUF) {
5597 			error = EINVAL;
5598 			goto out;
5599 		}
5600 		*retval = PIPE_BUF;
5601 		error = 0;
5602 		goto out;
5603 
5604 	case DTYPE_VNODE:
5605 		vp = (struct vnode *)fp_get_data(fp);
5606 
5607 		if ((error = vnode_getwithref(vp)) == 0) {
5608 			AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5609 
5610 			error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
5611 
5612 			(void)vnode_put(vp);
5613 		}
5614 		goto out;
5615 
5616 	default:
5617 		error = EINVAL;
5618 		goto out;
5619 	}
5620 	/*NOTREACHED*/
5621 out:
5622 	fp_drop(p, fd, fp, 0);
5623 	return error;
5624 }
5625 
5626 /*
5627  * sys_flock
5628  *
5629  * Description:	Apply an advisory lock on a file descriptor.
5630  *
5631  * Parameters:	p				Process making request
5632  *		uap->fd				fd on which the lock is to be
5633  *						attempted
5634  *		uap->how			(Un)Lock bits, including type
5635  *		retval				Pointer to the call return area
5636  *
5637  * Returns:	0				Success
5638  *	fp_getfvp:EBADF				Bad file descriptor
5639  *	fp_getfvp:ENOTSUP			fd does not refer to a vnode
5640  *	vnode_getwithref:???
5641  *	VNOP_ADVLOCK:???
5642  *
5643  * Implicit returns:
5644  *		*retval (modified)		Size of dtable
5645  *
5646  * Notes:	Just attempt to get a record lock of the requested type on
5647  *		the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5648  */
5649 int
sys_flock(proc_t p,struct flock_args * uap,__unused int32_t * retval)5650 sys_flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5651 {
5652 	int fd = uap->fd;
5653 	int how = uap->how;
5654 	struct fileproc *fp;
5655 	struct vnode *vp;
5656 	struct flock lf;
5657 	vfs_context_t ctx = vfs_context_current();
5658 	int error = 0;
5659 
5660 	AUDIT_ARG(fd, uap->fd);
5661 	if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5662 		return error;
5663 	}
5664 	if ((error = vnode_getwithref(vp))) {
5665 		goto out1;
5666 	}
5667 	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5668 
5669 	lf.l_whence = SEEK_SET;
5670 	lf.l_start = 0;
5671 	lf.l_len = 0;
5672 	if (how & LOCK_UN) {
5673 		lf.l_type = F_UNLCK;
5674 		error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5675 		goto out;
5676 	}
5677 	if (how & LOCK_EX) {
5678 		lf.l_type = F_WRLCK;
5679 	} else if (how & LOCK_SH) {
5680 		lf.l_type = F_RDLCK;
5681 	} else {
5682 		error = EBADF;
5683 		goto out;
5684 	}
5685 #if CONFIG_MACF
5686 	error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob, F_SETLK, &lf);
5687 	if (error) {
5688 		goto out;
5689 	}
5690 #endif
5691 	error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
5692 	    (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5693 	    ctx, NULL);
5694 	if (!error) {
5695 		os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
5696 	}
5697 out:
5698 	(void)vnode_put(vp);
5699 out1:
5700 	fp_drop(p, fd, fp, 0);
5701 	return error;
5702 }
5703 
5704 /*
5705  * sys_fileport_makeport
5706  *
5707  * Description: Obtain a Mach send right for a given file descriptor.
5708  *
5709  * Parameters:	p		Process calling fileport
5710  *              uap->fd		The fd to reference
5711  *              uap->portnamep  User address at which to place port name.
5712  *
5713  * Returns:	0		Success.
5714  *              EBADF		Bad file descriptor.
5715  *              EINVAL		File descriptor had type that cannot be sent, misc. other errors.
5716  *              EFAULT		Address at which to store port name is not valid.
5717  *              EAGAIN		Resource shortage.
5718  *
5719  * Implicit returns:
5720  *		On success, name of send right is stored at user-specified address.
5721  */
5722 int
sys_fileport_makeport(proc_t p,struct fileport_makeport_args * uap,__unused int * retval)5723 sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5724     __unused int *retval)
5725 {
5726 	int err;
5727 	int fd = uap->fd;
5728 	user_addr_t user_portaddr = uap->portnamep;
5729 	struct fileproc *fp = FILEPROC_NULL;
5730 	struct fileglob *fg = NULL;
5731 	ipc_port_t fileport;
5732 	mach_port_name_t name = MACH_PORT_NULL;
5733 
5734 	proc_fdlock(p);
5735 	err = fp_lookup(p, fd, &fp, 1);
5736 	if (err != 0) {
5737 		goto out_unlock;
5738 	}
5739 
5740 	fg = fp->fp_glob;
5741 	if (!fg_sendable(fg)) {
5742 		err = EINVAL;
5743 		goto out_unlock;
5744 	}
5745 
5746 	if (fp_isguarded(fp, GUARD_FILEPORT)) {
5747 		err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5748 		goto out_unlock;
5749 	}
5750 
5751 	/* Dropped when port is deallocated */
5752 	fg_ref(p, fg);
5753 
5754 	proc_fdunlock(p);
5755 
5756 	/* Allocate and initialize a port */
5757 	fileport = fileport_alloc(fg);
5758 	if (fileport == IPC_PORT_NULL) {
5759 		fg_drop_live(fg);
5760 		err = EAGAIN;
5761 		goto out;
5762 	}
5763 
5764 	/* Add an entry.  Deallocates port on failure. */
5765 	name = ipc_port_copyout_send(fileport, get_task_ipcspace(proc_task(p)));
5766 	if (!MACH_PORT_VALID(name)) {
5767 		err = EINVAL;
5768 		goto out;
5769 	}
5770 
5771 	err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5772 	if (err != 0) {
5773 		goto out;
5774 	}
5775 
5776 	/* Tag the fileglob for debugging purposes */
5777 	lck_mtx_lock_spin(&fg->fg_lock);
5778 	fg->fg_lflags |= FG_PORTMADE;
5779 	lck_mtx_unlock(&fg->fg_lock);
5780 
5781 	fp_drop(p, fd, fp, 0);
5782 
5783 	return 0;
5784 
5785 out_unlock:
5786 	proc_fdunlock(p);
5787 out:
5788 	if (MACH_PORT_VALID(name)) {
5789 		/* Don't care if another thread races us to deallocate the entry */
5790 		(void) mach_port_deallocate(get_task_ipcspace(proc_task(p)), name);
5791 	}
5792 
5793 	if (fp != FILEPROC_NULL) {
5794 		fp_drop(p, fd, fp, 0);
5795 	}
5796 
5797 	return err;
5798 }
5799 
5800 void
fileport_releasefg(struct fileglob * fg)5801 fileport_releasefg(struct fileglob *fg)
5802 {
5803 	(void)fg_drop(PROC_NULL, fg);
5804 }
5805 
5806 /*
5807  * fileport_makefd
5808  *
5809  * Description: Obtain the file descriptor for a given Mach send right.
5810  *
5811  * Returns:	0		Success
5812  *		EINVAL		Invalid Mach port name, or port is not for a file.
5813  *	fdalloc:EMFILE
5814  *	fdalloc:ENOMEM		Unable to allocate fileproc or extend file table.
5815  *
5816  * Implicit returns:
5817  *		*retval (modified)		The new descriptor
5818  */
5819 int
fileport_makefd(proc_t p,ipc_port_t port,fileproc_flags_t fp_flags,int * retval)5820 fileport_makefd(proc_t p, ipc_port_t port, fileproc_flags_t fp_flags, int *retval)
5821 {
5822 	struct fileglob *fg;
5823 	struct fileproc *fp = FILEPROC_NULL;
5824 	int fd;
5825 	int err;
5826 
5827 	fg = fileport_port_to_fileglob(port);
5828 	if (fg == NULL) {
5829 		err = EINVAL;
5830 		goto out;
5831 	}
5832 
5833 	fp = fileproc_alloc_init();
5834 
5835 	proc_fdlock(p);
5836 	err = fdalloc(p, 0, &fd);
5837 	if (err != 0) {
5838 		proc_fdunlock(p);
5839 		goto out;
5840 	}
5841 	if (fp_flags) {
5842 		fp->fp_flags |= fp_flags;
5843 	}
5844 
5845 	fp->fp_glob = fg;
5846 	fg_ref(p, fg);
5847 
5848 	procfdtbl_releasefd(p, fd, fp);
5849 	proc_fdunlock(p);
5850 
5851 	*retval = fd;
5852 	err = 0;
5853 out:
5854 	if ((fp != NULL) && (0 != err)) {
5855 		fileproc_free(fp);
5856 	}
5857 
5858 	return err;
5859 }
5860 
5861 /*
5862  * sys_fileport_makefd
5863  *
5864  * Description: Obtain the file descriptor for a given Mach send right.
5865  *
5866  * Parameters:	p		Process calling fileport
5867  *              uap->port	Name of send right to file port.
5868  *
5869  * Returns:	0		Success
5870  *		EINVAL		Invalid Mach port name, or port is not for a file.
5871  *	fdalloc:EMFILE
5872  *	fdalloc:ENOMEM		Unable to allocate fileproc or extend file table.
5873  *
5874  * Implicit returns:
5875  *		*retval (modified)		The new descriptor
5876  */
5877 int
sys_fileport_makefd(proc_t p,struct fileport_makefd_args * uap,int32_t * retval)5878 sys_fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5879 {
5880 	ipc_port_t port = IPC_PORT_NULL;
5881 	mach_port_name_t send = uap->port;
5882 	kern_return_t res;
5883 	int err;
5884 
5885 	res = ipc_object_copyin(get_task_ipcspace(proc_task(p)),
5886 	    send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
5887 
5888 	if (res == KERN_SUCCESS) {
5889 		err = fileport_makefd(p, port, FP_CLOEXEC, retval);
5890 	} else {
5891 		err = EINVAL;
5892 	}
5893 
5894 	if (IPC_PORT_NULL != port) {
5895 		ipc_port_release_send(port);
5896 	}
5897 
5898 	return err;
5899 }
5900 
5901 
5902 #pragma mark fileops wrappers
5903 
5904 /*
5905  * fo_read
5906  *
5907  * Description:	Generic fileops read indirected through the fileops pointer
5908  *		in the fileproc structure
5909  *
5910  * Parameters:	fp				fileproc structure pointer
5911  *		uio				user I/O structure pointer
5912  *		flags				FOF_ flags
5913  *		ctx				VFS context for operation
5914  *
5915  * Returns:	0				Success
5916  *		!0				Errno from read
5917  */
5918 int
fo_read(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5919 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5920 {
5921 	return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
5922 }
5923 
5924 int
fo_no_read(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5925 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5926 {
5927 #pragma unused(fp, uio, flags, ctx)
5928 	return ENXIO;
5929 }
5930 
5931 
5932 /*
5933  * fo_write
5934  *
5935  * Description:	Generic fileops write indirected through the fileops pointer
5936  *		in the fileproc structure
5937  *
5938  * Parameters:	fp				fileproc structure pointer
5939  *		uio				user I/O structure pointer
5940  *		flags				FOF_ flags
5941  *		ctx				VFS context for operation
5942  *
5943  * Returns:	0				Success
5944  *		!0				Errno from write
5945  */
5946 int
fo_write(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5947 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5948 {
5949 	return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
5950 }
5951 
5952 int
fo_no_write(struct fileproc * fp,struct uio * uio,int flags,vfs_context_t ctx)5953 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5954 {
5955 #pragma unused(fp, uio, flags, ctx)
5956 	return ENXIO;
5957 }
5958 
5959 
5960 /*
5961  * fo_ioctl
5962  *
5963  * Description:	Generic fileops ioctl indirected through the fileops pointer
5964  *		in the fileproc structure
5965  *
5966  * Parameters:	fp				fileproc structure pointer
5967  *		com				ioctl command
5968  *		data				pointer to internalized copy
5969  *						of user space ioctl command
5970  *						parameter data in kernel space
5971  *		ctx				VFS context for operation
5972  *
5973  * Returns:	0				Success
5974  *		!0				Errno from ioctl
5975  *
5976  * Locks:	The caller is assumed to have held the proc_fdlock; this
5977  *		function releases and reacquires this lock.  If the caller
5978  *		accesses data protected by this lock prior to calling this
5979  *		function, it will need to revalidate/reacquire any cached
5980  *		protected data obtained prior to the call.
5981  */
5982 int
fo_ioctl(struct fileproc * fp,u_long com,caddr_t data,vfs_context_t ctx)5983 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5984 {
5985 	int error;
5986 
5987 	proc_fdunlock(vfs_context_proc(ctx));
5988 	error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5989 	proc_fdlock(vfs_context_proc(ctx));
5990 	return error;
5991 }
5992 
5993 int
fo_no_ioctl(struct fileproc * fp,u_long com,caddr_t data,vfs_context_t ctx)5994 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5995 {
5996 #pragma unused(fp, com, data, ctx)
5997 	return ENOTTY;
5998 }
5999 
6000 
6001 /*
6002  * fo_select
6003  *
6004  * Description:	Generic fileops select indirected through the fileops pointer
6005  *		in the fileproc structure
6006  *
6007  * Parameters:	fp				fileproc structure pointer
6008  *		which				select which
6009  *		wql				pointer to wait queue list
6010  *		ctx				VFS context for operation
6011  *
6012  * Returns:	0				Success
6013  *		!0				Errno from select
6014  */
6015 int
fo_select(struct fileproc * fp,int which,void * wql,vfs_context_t ctx)6016 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
6017 {
6018 	return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
6019 }
6020 
6021 int
fo_no_select(struct fileproc * fp,int which,void * wql,vfs_context_t ctx)6022 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
6023 {
6024 #pragma unused(fp, which, wql, ctx)
6025 	return ENOTSUP;
6026 }
6027 
6028 
6029 /*
6030  * fo_close
6031  *
6032  * Description:	Generic fileops close indirected through the fileops pointer
6033  *		in the fileproc structure
6034  *
6035  * Parameters:	fp				fileproc structure pointer for
6036  *						file to close
6037  *		ctx				VFS context for operation
6038  *
6039  * Returns:	0				Success
6040  *		!0				Errno from close
6041  */
6042 int
fo_close(struct fileglob * fg,vfs_context_t ctx)6043 fo_close(struct fileglob *fg, vfs_context_t ctx)
6044 {
6045 	return (*fg->fg_ops->fo_close)(fg, ctx);
6046 }
6047 
6048 
6049 /*
6050  * fo_drain
6051  *
6052  * Description:	Generic fileops kqueue filter indirected through the fileops
6053  *		pointer in the fileproc structure
6054  *
6055  * Parameters:	fp				fileproc structure pointer
6056  *		ctx				VFS context for operation
6057  *
6058  * Returns:	0				Success
6059  *		!0				errno from drain
6060  */
6061 int
fo_drain(struct fileproc * fp,vfs_context_t ctx)6062 fo_drain(struct fileproc *fp, vfs_context_t ctx)
6063 {
6064 	return (*fp->f_ops->fo_drain)(fp, ctx);
6065 }
6066 
6067 int
fo_no_drain(struct fileproc * fp,vfs_context_t ctx)6068 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
6069 {
6070 #pragma unused(fp, ctx)
6071 	return ENOTSUP;
6072 }
6073 
6074 
6075 /*
6076  * fo_kqfilter
6077  *
6078  * Description:	Generic fileops kqueue filter indirected through the fileops
6079  *		pointer in the fileproc structure
6080  *
6081  * Parameters:	fp				fileproc structure pointer
6082  *		kn				pointer to knote to filter on
6083  *
6084  * Returns:	(kn->kn_flags & EV_ERROR)	error in kn->kn_data
6085  *		0				Filter is not active
6086  *		!0				Filter is active
6087  */
6088 int
fo_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)6089 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6090 {
6091 	return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
6092 }
6093 
6094 int
fo_no_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)6095 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6096 {
6097 #pragma unused(fp, kev)
6098 	knote_set_error(kn, ENOTSUP);
6099 	return 0;
6100 }
6101