xref: /xnu-11417.101.15/bsd/vfs/vfs_lookup.c (revision e3723e1f17661b24996789d8afc084c0c3303b26)
1 /*
2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1982, 1986, 1989, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  * (c) UNIX System Laboratories, Inc.
33  * All or some portions of this file are derived from material licensed
34  * to the University of California by American Telephone and Telegraph
35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36  * the permission of UNIX System Laboratories, Inc.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. All advertising materials mentioning features or use of this software
47  *    must display the following acknowledgement:
48  *	This product includes software developed by the University of
49  *	California, Berkeley and its contributors.
50  * 4. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)vfs_lookup.c	8.10 (Berkeley) 5/27/95
67  */
68 /*
69  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
70  * support for mandatory and extensible security protections.  This notice
71  * is included in support of clause 2.2 (b) of the Apple Public License,
72  * Version 2.0.
73  */
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/syslimits.h>
78 #include <sys/time.h>
79 #include <sys/namei.h>
80 #include <sys/vm.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/errno.h>
84 #include <kern/kalloc.h>
85 #include <sys/filedesc.h>
86 #include <sys/proc_internal.h>
87 #include <sys/kdebug.h>
88 #include <sys/unistd.h>         /* For _PC_NAME_MAX */
89 #include <sys/uio_internal.h>
90 #include <sys/kauth.h>
91 #include <kern/zalloc.h>
92 #include <security/audit/audit.h>
93 #if CONFIG_MACF
94 #include <security/mac_framework.h>
95 #endif
96 #include <os/atomic_private.h>
97 
98 #include <sys/paths.h>
99 
100 #if NAMEDRSRCFORK
101 #include <sys/xattr.h>
102 #endif
103 /*
104  * The minimum volfs-style pathname is 9.
105  * Example:  "/.vol/1/2"
106  */
107 #define VOLFS_MIN_PATH_LEN  9
108 
109 
110 #if CONFIG_VOLFS
111 static int vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_t ctx);
112 #define MAX_VOLFS_RESTARTS 5
113 #endif
114 
115 static int              lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx);
116 static int              lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, bool* dp_has_iocount, vfs_context_t ctx);
117 static int              lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx);
118 static void             lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation);
119 static int              lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly,
120     int vbusyflags, int *keep_going, int nc_generation,
121     int wantparent, int atroot, vfs_context_t ctx);
122 static int              lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent);
123 
124 #if NAMEDRSRCFORK
125 static int              lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx);
126 #endif
127 
128 extern lck_rw_t rootvnode_rw_lock;
129 
130 #define RESOLVE_NOFOLLOW_ANY  0x00000001
131 #define RESOLVE_CHECKED       0x80000000
132 static int              lookup_check_for_resolve_prefix(char *path, size_t pathbuflen, size_t len, uint32_t *resolve_flags, size_t *prefix_len);
133 
134 /*
135  * Convert a pathname into a pointer to a locked inode.
136  *
137  * The FOLLOW flag is set when symbolic links are to be followed
138  * when they occur at the end of the name translation process.
139  * Symbolic links are always followed for all other pathname
140  * components other than the last.
141  *
142  * The segflg defines whether the name is to be copied from user
143  * space or kernel space.
144  *
145  * Overall outline of namei:
146  *
147  *	copy in name
148  *	get starting directory
149  *	while (!done && !error) {
150  *		call lookup to search path.
151  *		if symbolic link, massage name in buffer and continue
152  *	}
153  *
154  * Returns:	0			Success
155  *		ENOENT			No such file or directory
156  *		ELOOP			Too many levels of symbolic links
157  *		ENAMETOOLONG		Filename too long
158  *		copyinstr:EFAULT	Bad address
159  *		copyinstr:ENAMETOOLONG	Filename too long
160  *		lookup:EBADF		Bad file descriptor
161  *		lookup:EROFS
162  *		lookup:EACCES
163  *		lookup:EPERM
164  *		lookup:ERECYCLE	 vnode was recycled from underneath us in lookup.
165  *						 This means we should re-drive lookup from this point.
166  *		lookup: ???
167  *		VNOP_READLINK:???
168  */
169 int
namei(struct nameidata * ndp)170 namei(struct nameidata *ndp)
171 {
172 	struct vnode *dp;       /* the directory we are searching */
173 	struct vnode *usedvp = ndp->ni_dvp;  /* store pointer to vp in case we must loop due to
174 	                                      *                                          heavy vnode pressure */
175 	uint32_t cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */
176 	int error;
177 	struct componentname *cnp = &ndp->ni_cnd;
178 	vfs_context_t ctx = cnp->cn_context;
179 	proc_t p = vfs_context_proc(ctx);
180 #if CONFIG_AUDIT
181 /* XXX ut should be from context */
182 	uthread_t ut = current_uthread();
183 #endif
184 
185 #if CONFIG_VOLFS
186 	int volfs_restarts = 0;
187 #endif
188 	size_t bytes_copied = 0;
189 	size_t resolve_prefix_len = 0;
190 	vnode_t rootdir_with_usecount = NULLVP;
191 	vnode_t startdir_with_usecount = NULLVP;
192 	vnode_t usedvp_dp = NULLVP;
193 	int32_t old_count = 0;
194 	uint32_t resolve_flags = 0;
195 	int resolve_error = 0;
196 	bool dp_has_iocount = false;
197 	bool clear_usedvp = false;
198 
199 #if DIAGNOSTIC
200 	if (!vfs_context_ucred(ctx) || !p) {
201 		panic("namei: bad cred/proc");
202 	}
203 	if (cnp->cn_nameiop & (~OPMASK)) {
204 		panic("namei: nameiop contaminated with flags");
205 	}
206 	if (cnp->cn_flags & OPMASK) {
207 		panic("namei: flags contaminated with nameiops");
208 	}
209 #endif
210 
211 	/*
212 	 * A compound VNOP found something that needs further processing:
213 	 * either a trigger vnode, a covered directory, or a symlink.
214 	 */
215 	if (ndp->ni_flag & NAMEI_CONTLOOKUP) {
216 		int rdonly, vbusyflags, keep_going, wantparent;
217 
218 		rdonly = cnp->cn_flags & RDONLY;
219 		vbusyflags = ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) ? LK_NOWAIT : 0;
220 		keep_going = 0;
221 		wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
222 
223 		ndp->ni_flag &= ~(NAMEI_CONTLOOKUP);
224 
225 		error = lookup_handle_found_vnode(ndp, &ndp->ni_cnd, rdonly, vbusyflags,
226 		    &keep_going, ndp->ni_ncgeneration, wantparent, 0, ctx);
227 		if (error) {
228 			goto out_drop;
229 		}
230 		if (keep_going) {
231 			if ((cnp->cn_flags & ISSYMLINK) == 0) {
232 				panic("We need to keep going on a continued lookup, but for vp type %d (tag %d)", ndp->ni_vp->v_type, ndp->ni_vp->v_tag);
233 			}
234 			goto continue_symlink;
235 		}
236 
237 		return 0;
238 	}
239 
240 vnode_recycled:
241 
242 	/*
243 	 * Get a buffer for the name to be translated, and copy the
244 	 * name into the buffer.
245 	 */
246 	if ((cnp->cn_flags & HASBUF) == 0) {
247 		cnp->cn_pnbuf = ndp->ni_pathbuf;
248 		cnp->cn_pnlen = PATHBUFLEN;
249 	}
250 
251 retry_copy:
252 	if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
253 		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
254 		    cnp->cn_pnlen, &bytes_copied);
255 	} else {
256 		error = copystr(CAST_DOWN(void *, ndp->ni_dirp), cnp->cn_pnbuf,
257 		    cnp->cn_pnlen, &bytes_copied);
258 	}
259 	if (error == ENAMETOOLONG && !(cnp->cn_flags & HASBUF)) {
260 		if (bytes_copied == PATHBUFLEN) {
261 			resolve_error = lookup_check_for_resolve_prefix(cnp->cn_pnbuf, PATHBUFLEN,
262 			    PATHBUFLEN, &resolve_flags, &resolve_prefix_len);
263 			/* errors from copyinstr take precedence over resolve_error */
264 			if (!resolve_error && resolve_prefix_len) {
265 				ndp->ni_dirp += resolve_prefix_len;
266 				resolve_prefix_len = 0;
267 			}
268 		}
269 
270 		cnp->cn_pnbuf = zalloc(ZV_NAMEI);
271 		cnp->cn_flags |= HASBUF;
272 		cnp->cn_pnlen = MAXPATHLEN;
273 		bytes_copied = 0;
274 
275 		goto retry_copy;
276 	} else if (error == ENAMETOOLONG && (cnp->cn_flags & HASBUF) &&
277 	    (cnp->cn_pnlen * 2) <= MAXLONGPATHLEN && proc_support_long_paths(p)) {
278 		if (cnp->cn_pnlen == MAXPATHLEN) {
279 			/* First time we arrive here, the buffer came from ZV_NAMEI */
280 			zfree(ZV_NAMEI, cnp->cn_pnbuf);
281 		} else {
282 			kfree_data(cnp->cn_pnbuf, cnp->cn_pnlen);
283 		}
284 
285 		resolve_error = 0;
286 
287 		cnp->cn_pnlen *= 2;
288 		cnp->cn_pnbuf = kalloc_data(cnp->cn_pnlen, Z_WAITOK | Z_ZERO | Z_NOFAIL);
289 		bytes_copied = 0;
290 
291 		goto retry_copy;
292 	}
293 	if (error) {
294 		goto error_out;
295 	} else if (resolve_error) {
296 		error = resolve_error;
297 		goto error_out;
298 	}
299 	assert(bytes_copied <= cnp->cn_pnlen);
300 	ndp->ni_pathlen = (u_int)bytes_copied;
301 	bytes_copied = 0;
302 
303 	if (!(resolve_flags & RESOLVE_CHECKED)) {
304 		assert(!(cnp->cn_flags & HASBUF) && (cnp->cn_pnlen == PATHBUFLEN));
305 		error = lookup_check_for_resolve_prefix(cnp->cn_pnbuf, cnp->cn_pnlen, ndp->ni_pathlen,
306 		    &resolve_flags, &resolve_prefix_len);
307 		if (error) {
308 			goto error_out;
309 		}
310 		if (resolve_prefix_len) {
311 			/*
312 			 * Since this is pointing to the static path buffer instead of a zalloc'ed memorry,
313 			 * we're not going to attempt to free this, so it is perfectly fine to change the
314 			 * value of cnp->cn_pnbuf.
315 			 */
316 			cnp->cn_pnbuf += resolve_prefix_len;
317 			cnp->cn_pnlen -= resolve_prefix_len;
318 			ndp->ni_pathlen -= resolve_prefix_len;
319 			resolve_prefix_len = 0;
320 		}
321 	}
322 
323 	/* At this point we should have stripped off the prefix from the path that has to be looked up */
324 	assert((resolve_flags & RESOLVE_CHECKED) && (resolve_prefix_len == 0));
325 
326 	/*
327 	 * Since the name cache may contain positive entries of
328 	 * the incorrect case, force lookup() to bypass the cache
329 	 * and call directly into the filesystem for each path
330 	 * component. Note: the FS may still consult the cache,
331 	 * but can apply rules to validate the results.
332 	 */
333 	if (proc_is_forcing_hfs_case_sensitivity(p)) {
334 		cnp->cn_flags |= CN_SKIPNAMECACHE;
335 	}
336 
337 #if CONFIG_VOLFS
338 	/*
339 	 * Check for legacy volfs style pathnames.
340 	 *
341 	 * For compatibility reasons we currently allow these paths,
342 	 * but future versions of the OS may not support them.
343 	 */
344 	if (ndp->ni_pathlen >= VOLFS_MIN_PATH_LEN &&
345 	    cnp->cn_pnbuf[0] == '/' &&
346 	    cnp->cn_pnbuf[1] == '.' &&
347 	    cnp->cn_pnbuf[2] == 'v' &&
348 	    cnp->cn_pnbuf[3] == 'o' &&
349 	    cnp->cn_pnbuf[4] == 'l' &&
350 	    cnp->cn_pnbuf[5] == '/') {
351 		char * realpath;
352 		size_t realpathlen;
353 		int realpath_err;
354 		/* Attempt to resolve a legacy volfs style pathname. */
355 
356 		realpathlen = MAXPATHLEN;
357 		do {
358 			if (realpathlen == MAXPATHLEN) {
359 				realpath = zalloc(ZV_NAMEI);
360 			} else {
361 				/*
362 				 * To be consistent with the behavior of openbyid_np, which always supports
363 				 * long paths, do not gate our support on proc_support_long_paths either.
364 				 */
365 				realpath = kalloc_data(realpathlen, Z_WAITOK | Z_ZERO | Z_NOFAIL);
366 			}
367 			/*
368 			 * We only error out on the ENAMETOOLONG cases where we know that
369 			 * vfs_getrealpath translation succeeded but the path could not fit into
370 			 * realpathlen characters.  In other failure cases, we may be dealing with a path
371 			 * that legitimately looks like /.vol/1234/567 and is not meant to be translated
372 			 */
373 			if ((realpath_err = vfs_getrealpath(&cnp->cn_pnbuf[6], realpath, realpathlen, ctx))) {
374 				if (realpathlen == MAXPATHLEN) {
375 					zfree(ZV_NAMEI, realpath);
376 				} else {
377 					kfree_data(realpath, realpathlen);
378 				}
379 				if (realpath_err == ENOSPC || realpath_err == ENAMETOOLONG) {
380 					error = ENAMETOOLONG;
381 				}
382 			} else {
383 				size_t tmp_len;
384 				if (cnp->cn_flags & HASBUF) {
385 					if (cnp->cn_pnlen == MAXPATHLEN) {
386 						zfree(ZV_NAMEI, cnp->cn_pnbuf);
387 					} else {
388 						kfree_data(cnp->cn_pnbuf, cnp->cn_pnlen);
389 					}
390 				}
391 				cnp->cn_pnbuf = realpath;
392 				cnp->cn_pnlen = (int)realpathlen;
393 				tmp_len = strlen(realpath) + 1;
394 				assert(tmp_len <= UINT_MAX);
395 				ndp->ni_pathlen = (u_int)tmp_len;
396 				cnp->cn_flags |= HASBUF | CN_VOLFSPATH;
397 				error = 0;
398 			}
399 		} while (error == ENAMETOOLONG && (realpathlen *= 2) && realpathlen <= MAXLONGPATHLEN);
400 
401 		if (error) {
402 			goto error_out;
403 		}
404 	}
405 #endif /* CONFIG_VOLFS */
406 
407 #if CONFIG_AUDIT
408 	/* If we are auditing the kernel pathname, save the user pathname */
409 	if (cnp->cn_flags & AUDITVNPATH1) {
410 		AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH1);
411 	}
412 	if (cnp->cn_flags & AUDITVNPATH2) {
413 		AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH2);
414 	}
415 #endif /* CONFIG_AUDIT */
416 
417 	/*
418 	 * Do not allow empty pathnames
419 	 */
420 	if (*cnp->cn_pnbuf == '\0') {
421 		error = ENOENT;
422 		goto error_out;
423 	}
424 	if (ndp->ni_flag & NAMEI_NOFOLLOW_ANY || (resolve_flags & RESOLVE_NOFOLLOW_ANY)) {
425 		ndp->ni_loopcnt = MAXSYMLINKS;
426 	} else {
427 		ndp->ni_loopcnt = 0;
428 	}
429 
430 	/*
431 	 * determine the starting point for the translation.
432 	 */
433 	proc_dirs_lock_shared(p);
434 	lck_rw_lock_shared(&rootvnode_rw_lock);
435 
436 	if (!(ndp->ni_flag & NAMEI_ROOTDIR)) {
437 		if (fdt_flag_test(&p->p_fd, FD_CHROOT)) {
438 			ndp->ni_rootdir = p->p_fd.fd_rdir;
439 		} else {
440 			ndp->ni_rootdir = rootvnode;
441 		}
442 	}
443 
444 	if (!ndp->ni_rootdir) {
445 		if (ndp->ni_flag & NAMEI_ROOTDIR) {
446 			panic("NAMEI_ROOTDIR is set but ni_rootdir is not\n");
447 		} else if (fdt_flag_test(&p->p_fd, FD_CHROOT)) {
448 			/* This should be a panic */
449 			printf("p->p_fd.fd_rdir is not set\n");
450 		} else {
451 			printf("rootvnode is not set\n");
452 		}
453 		lck_rw_unlock_shared(&rootvnode_rw_lock);
454 		proc_dirs_unlock_shared(p);
455 		error = ENOENT;
456 		goto error_out;
457 	}
458 
459 	cnp->cn_nameptr = cnp->cn_pnbuf;
460 
461 	ndp->ni_usedvp = NULLVP;
462 
463 	if (*(cnp->cn_nameptr) == '/') {
464 		while (*(cnp->cn_nameptr) == '/') {
465 			cnp->cn_nameptr++;
466 			ndp->ni_pathlen--;
467 		}
468 		if (ndp->ni_flag & NAMEI_RESOLVE_BENEATH) {
469 			/* Absolute paths are never allowed in NAMEI_RESOLVE_BENEATH */
470 			lck_rw_unlock_shared(&rootvnode_rw_lock);
471 			proc_dirs_unlock_shared(p);
472 			error = EACCES;
473 			goto error_out;
474 		}
475 		dp = ndp->ni_rootdir;
476 	} else if (cnp->cn_flags & USEDVP) {
477 		dp = ndp->ni_dvp;
478 		ndp->ni_usedvp = dp;
479 		usedvp_dp = dp;
480 	} else {
481 		dp = vfs_context_cwd(ctx);
482 		if (ndp->ni_flag & NAMEI_RESOLVE_BENEATH) {
483 			/* Store the starting directory because it can change after a symlink traversal */
484 			ndp->ni_usedvp = dp;
485 			clear_usedvp = true;
486 		}
487 	}
488 
489 	if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) {
490 		dp = NULLVP;
491 		lck_rw_unlock_shared(&rootvnode_rw_lock);
492 		proc_dirs_unlock_shared(p);
493 		error = ENOENT;
494 		goto error_out;
495 	}
496 
497 	/*
498 	 * We need our own usecount on the root vnode and the starting dir across
499 	 * the lookup. There's two things that be done here. We can hold the locks
500 	 * (which protect the existing usecounts on the directories) across the
501 	 * lookup or take our own usecount. Holding the locks across the lookup can
502 	 * cause deadlock issues if we re-enter namei on the same thread so the
503 	 * correct thing to do is to acquire our own usecount.
504 	 *
505 	 * Ideally, the usecount should be obtained by vnode_get->vnode_ref->vnode_put.
506 	 * However when this vnode is the rootvnode, that sequence will produce a
507 	 * lot of vnode mutex locks and  unlocks on a single vnode (the rootvnode)
508 	 * and will be highly contended and degrade performance. Since we have
509 	 * an existing usecount protected by the locks we hold, we'll just use
510 	 * an atomic op to increment the usecount on a vnode which already has one
511 	 * and can't be released because we have the locks which protect against that
512 	 * happening.
513 	 */
514 	rootdir_with_usecount = ndp->ni_rootdir;
515 	old_count = os_atomic_inc_orig(&rootdir_with_usecount->v_usecount, relaxed);
516 	if (old_count < 1) {
517 		panic("(1) invalid pre-increment usecount (%d) for rootdir vnode %p",
518 		    old_count, rootdir_with_usecount);
519 	} else if (old_count == INT32_MAX) {
520 		panic("(1) usecount overflow for vnode %p", rootdir_with_usecount);
521 	}
522 
523 	if ((dp != rootdir_with_usecount) && (dp != usedvp_dp)) {
524 		old_count = os_atomic_inc_orig(&dp->v_usecount, relaxed);
525 		if (old_count < 1) {
526 			panic("(2) invalid pre-increment usecount (%d) for vnode %p", old_count, dp);
527 		} else if (old_count == INT32_MAX) {
528 			panic("(2) usecount overflow for vnode %p", dp);
529 		}
530 		startdir_with_usecount = dp;
531 	}
532 
533 	/* Now that we have our usecount, release the locks */
534 	lck_rw_unlock_shared(&rootvnode_rw_lock);
535 	proc_dirs_unlock_shared(p);
536 
537 	ndp->ni_dvp = NULLVP;
538 	ndp->ni_vp  = NULLVP;
539 
540 	for (;;) {
541 #if CONFIG_MACF
542 		/*
543 		 * Give MACF policies a chance to reject the lookup
544 		 * before performing any filesystem operations.
545 		 * This hook is called before resolving the path and
546 		 * again each time a symlink is encountered.
547 		 * NB: policies receive path information as supplied
548 		 *     by the caller and thus cannot be trusted.
549 		 */
550 		error = mac_vnode_check_lookup_preflight(ctx, dp, cnp->cn_nameptr, cnp->cn_namelen);
551 		if (error) {
552 			goto error_out;
553 		}
554 #endif
555 		ndp->ni_startdir = dp;
556 		dp = NULLVP;
557 
558 		if ((error = lookup(ndp))) {
559 			goto error_out;
560 		}
561 
562 		/*
563 		 * Check for symbolic link
564 		 */
565 		if ((cnp->cn_flags & ISSYMLINK) == 0) {
566 			if (startdir_with_usecount) {
567 				vnode_rele(startdir_with_usecount);
568 				startdir_with_usecount = NULLVP;
569 			}
570 			if (rootdir_with_usecount) {
571 				lck_rw_lock_shared(&rootvnode_rw_lock);
572 				if (rootdir_with_usecount == rootvnode) {
573 					old_count = os_atomic_dec_orig(&rootdir_with_usecount->v_usecount, relaxed);
574 					if (old_count < 2) {
575 						/*
576 						 * There needs to have been at least 1 usecount left on the rootvnode
577 						 */
578 						panic("(3) Unexpected pre-decrement value (%d) of usecount for rootvnode %p",
579 						    old_count, rootdir_with_usecount);
580 					}
581 					rootdir_with_usecount = NULLVP;
582 				}
583 				lck_rw_unlock_shared(&rootvnode_rw_lock);
584 				if (rootdir_with_usecount) {
585 					vnode_rele(rootdir_with_usecount);
586 					rootdir_with_usecount = NULLVP;
587 				}
588 			}
589 
590 			return 0;
591 		}
592 
593 continue_symlink:
594 		/* Gives us a new path to process, and a starting dir */
595 		error = lookup_handle_symlink(ndp, &dp, &dp_has_iocount, ctx);
596 		if (error != 0) {
597 			break;
598 		}
599 		if (dp_has_iocount) {
600 			if ((dp != rootdir_with_usecount) && (dp != startdir_with_usecount) &&
601 			    (dp != usedvp_dp)) {
602 				if (startdir_with_usecount) {
603 					vnode_rele(startdir_with_usecount);
604 				}
605 				vnode_ref_ext(dp, 0, VNODE_REF_FORCE);
606 				startdir_with_usecount = dp;
607 			}
608 			vnode_put(dp);
609 			dp_has_iocount = false;
610 		}
611 	}
612 	/*
613 	 * only come here if we fail to handle a SYMLINK...
614 	 * if either ni_dvp or ni_vp is non-NULL, then
615 	 * we need to drop the iocount that was picked
616 	 * up in the lookup routine
617 	 */
618 out_drop:
619 	if (ndp->ni_dvp) {
620 		vnode_put(ndp->ni_dvp);
621 	}
622 	if (ndp->ni_vp) {
623 		vnode_put(ndp->ni_vp);
624 	}
625 error_out:
626 	if (clear_usedvp) {
627 		ndp->ni_usedvp = NULLVP;
628 	}
629 	if (startdir_with_usecount) {
630 		vnode_rele(startdir_with_usecount);
631 		startdir_with_usecount = NULLVP;
632 	}
633 	if (rootdir_with_usecount) {
634 		lck_rw_lock_shared(&rootvnode_rw_lock);
635 		if (rootdir_with_usecount == rootvnode) {
636 			old_count = os_atomic_dec_orig(&rootdir_with_usecount->v_usecount, relaxed);
637 			if (old_count < 2) {
638 				/*
639 				 * There needs to have been at least 1 usecount left on the rootvnode
640 				 */
641 				panic("(4) Unexpected pre-decrement value (%d) of usecount for rootvnode %p",
642 				    old_count, rootdir_with_usecount);
643 			}
644 			lck_rw_unlock_shared(&rootvnode_rw_lock);
645 		} else {
646 			lck_rw_unlock_shared(&rootvnode_rw_lock);
647 			vnode_rele(rootdir_with_usecount);
648 		}
649 		rootdir_with_usecount = NULLVP;
650 	}
651 
652 	if ((cnp->cn_flags & HASBUF)) {
653 		cnp->cn_flags &= ~HASBUF;
654 		if (cnp->cn_pnlen == MAXPATHLEN) {
655 			zfree(ZV_NAMEI, cnp->cn_pnbuf);
656 		} else {
657 			kfree_data(cnp->cn_pnbuf, cnp->cn_pnlen);
658 		}
659 	}
660 	cnp->cn_pnbuf = NULL;
661 	ndp->ni_vp = NULLVP;
662 	ndp->ni_dvp = NULLVP;
663 
664 #if CONFIG_VOLFS
665 	/*
666 	 * Deal with volfs fallout.
667 	 *
668 	 * At this point, if we were originally given a volfs path that
669 	 * looks like /.vol/123/456, then we would have had to convert it into
670 	 * a full path.  Assuming that part worked properly, we will now attempt
671 	 * to conduct a lookup of the item in the namespace.  Under normal
672 	 * circumstances, if a user looked up /tmp/foo and it was not there, it
673 	 * would be permissible to return ENOENT.
674 	 *
675 	 * However, we may not want to do that here.  Specifically, the volfs path
676 	 * uniquely identifies a certain item in the namespace regardless of where it
677 	 * lives.  If the item has moved in between the time we constructed the
678 	 * path and now, when we're trying to do a lookup/authorization on the full
679 	 * path, we may have gotten an ENOENT.
680 	 *
681 	 * At this point we can no longer tell if the path no longer exists
682 	 * or if the item in question no longer exists. It could have been renamed
683 	 * away, in which case the /.vol identifier is still valid.
684 	 *
685 	 * Do this dance a maximum of MAX_VOLFS_RESTARTS times.
686 	 */
687 	if ((error == ENOENT) && (ndp->ni_cnd.cn_flags & CN_VOLFSPATH)) {
688 		if (volfs_restarts < MAX_VOLFS_RESTARTS) {
689 			volfs_restarts++;
690 			goto vnode_recycled;
691 		}
692 	}
693 #endif
694 
695 	if (error == ERECYCLE) {
696 		/* vnode was recycled underneath us. re-drive lookup to start at
697 		 *  the beginning again, since recycling invalidated last lookup*/
698 		ndp->ni_cnd.cn_flags = cnpflags;
699 		ndp->ni_dvp = usedvp;
700 		goto vnode_recycled;
701 	}
702 
703 
704 	return error;
705 }
706 
707 int
namei_compound_available(vnode_t dp,struct nameidata * ndp)708 namei_compound_available(vnode_t dp, struct nameidata *ndp)
709 {
710 	if ((ndp->ni_flag & NAMEI_COMPOUNDOPEN) != 0) {
711 		return vnode_compound_open_available(dp);
712 	}
713 
714 	return 0;
715 }
716 
717 static int
lookup_check_for_resolve_prefix(char * path,size_t pathbuflen,size_t len,uint32_t * resolve_flags,size_t * prefix_len)718 lookup_check_for_resolve_prefix(char *path, size_t pathbuflen, size_t len, uint32_t *resolve_flags, size_t *prefix_len)
719 {
720 	int error = 0;
721 	*resolve_flags = (uint32_t)RESOLVE_CHECKED;
722 	*prefix_len = 0;
723 
724 	if (len < (sizeof("/.nofollow/") - 1) || path[0] != '/' || path[1] != '.') {
725 		return 0;
726 	}
727 
728 	if ((strncmp(&path[2], "nofollow/", (sizeof("nofollow/") - 1)) == 0)) {
729 		*resolve_flags |= RESOLVE_NOFOLLOW_ANY;
730 		*prefix_len = sizeof("/.nofollow") - 1;
731 	} else if ((len >= sizeof("/.resolve/1/") - 1) &&
732 	    strncmp(&path[2], "resolve/", (sizeof("resolve/") - 1)) == 0) {
733 		char * flag = path + (sizeof("/.resolve/") - 1);
734 		char *next = flag;
735 		char last_char = path[pathbuflen - 1];
736 
737 		/* no leading zeroes or non digits */
738 		if ((flag[0] == '0' && flag[1] != '/') ||
739 		    flag[0] < '0' || flag[0] > '9') {
740 			error = EINVAL;
741 			goto out;
742 		}
743 
744 		path[pathbuflen - 1] = '\0';
745 		unsigned long flag_val = strtoul(flag, &next, 10);
746 		path[pathbuflen - 1] = last_char;
747 		if (next[0] != '/' || (flag_val & ~(RESOLVE_NOFOLLOW_ANY))) {
748 			error = EINVAL;
749 			goto out;
750 		}
751 		assert(next >= flag);
752 		*resolve_flags |= (uint32_t)flag_val;
753 		*prefix_len = (size_t)(next - path);
754 	}
755 out:
756 	assert(*prefix_len <= sizeof("/.resolve/2147483647"));
757 	return error;
758 }
759 
760 static int
lookup_authorize_search(vnode_t dp,struct componentname * cnp,int dp_authorized_in_cache,vfs_context_t ctx)761 lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx)
762 {
763 #if !CONFIG_MACF
764 #pragma unused(cnp)
765 #endif
766 
767 	int error;
768 
769 	if (!dp_authorized_in_cache) {
770 		error = vnode_authorize(dp, NULL, KAUTH_VNODE_SEARCH, ctx);
771 		if (error) {
772 			return error;
773 		}
774 	}
775 #if CONFIG_MACF
776 	error = mac_vnode_check_lookup(ctx, dp, cnp);
777 	if (error) {
778 		return error;
779 	}
780 #endif /* CONFIG_MACF */
781 
782 	return 0;
783 }
784 
785 static void
lookup_consider_update_cache(vnode_t dvp,vnode_t vp,struct componentname * cnp,int nc_generation)786 lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation)
787 {
788 	int isdot_or_dotdot;
789 	isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT);
790 
791 	if (vp->v_name == NULL || vp->v_parent == NULLVP) {
792 		int  update_flags = 0;
793 
794 		if (isdot_or_dotdot == 0) {
795 			if (vp->v_name == NULL) {
796 				update_flags |= VNODE_UPDATE_NAME;
797 			}
798 			if (dvp != NULLVP && vp->v_parent == NULLVP) {
799 				update_flags |= VNODE_UPDATE_PARENT;
800 			}
801 
802 			if (update_flags) {
803 				vnode_update_identity(vp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, update_flags);
804 			}
805 		}
806 	}
807 	if ((cnp->cn_flags & MAKEENTRY) && (vp->v_flag & VNCACHEABLE) && LIST_FIRST(&vp->v_nclinks) == NULL) {
808 		/*
809 		 * missing from name cache, but should
810 		 * be in it... this can happen if volfs
811 		 * causes the vnode to be created or the
812 		 * name cache entry got recycled but the
813 		 * vnode didn't...
814 		 * check to make sure that ni_dvp is valid
815 		 * cache_lookup_path may return a NULL
816 		 * do a quick check to see if the generation of the
817 		 * directory matches our snapshot... this will get
818 		 * rechecked behind the name cache lock, but if it
819 		 * already fails to match, no need to go any further
820 		 */
821 		if (dvp != NULLVP && (nc_generation == dvp->v_nc_generation) && (!isdot_or_dotdot)) {
822 			cache_enter_with_gen(dvp, vp, cnp, nc_generation);
823 		}
824 	}
825 }
826 
827 #if NAMEDRSRCFORK
828 /*
829  * Can change ni_dvp and ni_vp.  On success, returns with iocounts on stream vnode (always) and
830  * data fork if requested.  On failure, returns with iocount data fork (always) and its parent directory
831  * (if one was provided).
832  */
833 static int
lookup_handle_rsrc_fork(vnode_t dp,struct nameidata * ndp,struct componentname * cnp,int wantparent,vfs_context_t ctx)834 lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx)
835 {
836 	vnode_t svp = NULLVP;
837 	enum nsoperation nsop;
838 	int nsflags;
839 	int error;
840 
841 	if (dp->v_type != VREG) {
842 		error = ENOENT;
843 		goto out;
844 	}
845 	switch (cnp->cn_nameiop) {
846 	case DELETE:
847 		if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
848 			nsop = NS_DELETE;
849 		} else {
850 			error = EPERM;
851 			goto out;
852 		}
853 		break;
854 	case CREATE:
855 		if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
856 			nsop = NS_CREATE;
857 		} else {
858 			error = EPERM;
859 			goto out;
860 		}
861 		break;
862 	case LOOKUP:
863 		/* Make sure our lookup of "/..namedfork/rsrc" is allowed. */
864 		if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
865 			nsop = NS_OPEN;
866 		} else {
867 			error = EPERM;
868 			goto out;
869 		}
870 		break;
871 	default:
872 		error = EPERM;
873 		goto out;
874 	}
875 
876 	nsflags = 0;
877 	if (cnp->cn_flags & CN_RAW_ENCRYPTED) {
878 		nsflags |= NS_GETRAWENCRYPTED;
879 	}
880 
881 	/* Ask the file system for the resource fork. */
882 	error = vnode_getnamedstream(dp, &svp, XATTR_RESOURCEFORK_NAME, nsop, nsflags, ctx);
883 
884 	/* During a create, it OK for stream vnode to be missing. */
885 	if (error == ENOATTR || error == ENOENT) {
886 		error = (nsop == NS_CREATE) ? 0 : ENOENT;
887 	}
888 	if (error) {
889 		goto out;
890 	}
891 	/* The "parent" of the stream is the file. */
892 	if (wantparent) {
893 		if (ndp->ni_dvp) {
894 			vnode_put(ndp->ni_dvp);
895 		}
896 		ndp->ni_dvp = dp;
897 	} else {
898 		vnode_put(dp);
899 	}
900 	ndp->ni_vp = svp;  /* on create this may be null */
901 
902 	/* Restore the truncated pathname buffer (for audits). */
903 	if (ndp->ni_pathlen == 1 && ndp->ni_next[0] == '\0') {
904 		/*
905 		 * While we replaced only '/' with '\0' and would ordinarily
906 		 * need to just switch that back, the buffer in which we did
907 		 * this may not be what the pathname buffer is now when symlinks
908 		 * are involved. If we just restore the "/" we will make the
909 		 * string not terminated anymore, so be safe and restore the
910 		 * entire suffix.
911 		 */
912 		strncpy(ndp->ni_next, _PATH_RSRCFORKSPEC, sizeof(_PATH_RSRCFORKSPEC));
913 		cnp->cn_nameptr = ndp->ni_next + 1;
914 		cnp->cn_namelen = sizeof(_PATH_RSRCFORKSPEC) - 1;
915 		ndp->ni_next += cnp->cn_namelen;
916 		if (ndp->ni_next[0] != '\0') {
917 			panic("Incorrect termination of path in %s", __FUNCTION__);
918 		}
919 	}
920 	cnp->cn_flags  &= ~MAKEENTRY;
921 
922 	return 0;
923 out:
924 	return error;
925 }
926 #endif /* NAMEDRSRCFORK */
927 
928 /*
929  * iocounts in:
930  *      --One on ni_vp.  One on ni_dvp if there is more path, or we didn't come through the
931  *      cache, or we came through the cache and the caller doesn't want the parent.
932  *
933  * iocounts out:
934  *	--Leaves us in the correct state for the next step, whatever that might be.
935  *	--If we find a symlink, returns with iocounts on both ni_vp and ni_dvp.
936  *	--If we are to look up another component, then we have an iocount on ni_vp and
937  *	nothing else.
938  *	--If we are done, returns an iocount on ni_vp, and possibly on ni_dvp depending on nameidata flags.
939  *	--In the event of an error, may return with ni_dvp NULL'ed out (in which case, iocount
940  *	was dropped).
941  */
942 static int
lookup_handle_found_vnode(struct nameidata * ndp,struct componentname * cnp,int rdonly,int vbusyflags,int * keep_going,int nc_generation,int wantparent,int atroot,vfs_context_t ctx)943 lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly,
944     int vbusyflags, int *keep_going, int nc_generation,
945     int wantparent, int atroot, vfs_context_t ctx)
946 {
947 	vnode_t dp;
948 	int error;
949 	char *cp;
950 
951 	dp = ndp->ni_vp;
952 	*keep_going = 0;
953 
954 	if (ndp->ni_vp == NULLVP) {
955 		panic("NULL ni_vp in %s", __FUNCTION__);
956 	}
957 
958 	if (atroot) {
959 		goto nextname;
960 	}
961 
962 	/*
963 	 * Take into account any additional components consumed by
964 	 * the underlying filesystem.
965 	 */
966 	if (cnp->cn_consume > 0) {
967 		cnp->cn_nameptr += cnp->cn_consume;
968 		ndp->ni_next += cnp->cn_consume;
969 		ndp->ni_pathlen -= cnp->cn_consume;
970 		cnp->cn_consume = 0;
971 	} else {
972 		lookup_consider_update_cache(ndp->ni_dvp, dp, cnp, nc_generation);
973 	}
974 
975 	/*
976 	 * Check to see if the vnode has been mounted on...
977 	 * if so find the root of the mounted file system.
978 	 * Updates ndp->ni_vp.
979 	 */
980 	error = lookup_traverse_mountpoints(ndp, cnp, dp, vbusyflags, ctx);
981 	dp = ndp->ni_vp;
982 	if (error) {
983 		goto out;
984 	}
985 
986 #if CONFIG_MACF
987 	if (vfs_flags(vnode_mount(dp)) & MNT_MULTILABEL) {
988 		error = vnode_label(vnode_mount(dp), NULL, dp, NULL, 0, ctx);
989 		if (error) {
990 			goto out;
991 		}
992 	}
993 #endif
994 
995 	/*
996 	 * Check for symbolic link
997 	 */
998 	if ((dp->v_type == VLNK) &&
999 	    ((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) {
1000 		cnp->cn_flags |= ISSYMLINK;
1001 		*keep_going = 1;
1002 		return 0;
1003 	}
1004 
1005 	/*
1006 	 * Check for bogus trailing slashes.
1007 	 */
1008 	if ((ndp->ni_flag & NAMEI_TRAILINGSLASH)) {
1009 		if (dp->v_type != VDIR) {
1010 			error = ENOTDIR;
1011 			goto out;
1012 		}
1013 		ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
1014 	}
1015 
1016 #if NAMEDSTREAMS
1017 	/*
1018 	 * Deny namei/lookup requests to resolve paths that point to shadow files.
1019 	 * Access to shadow files must be conducted by explicit calls to VNOP_LOOKUP
1020 	 * directly, and not use lookup/namei
1021 	 */
1022 	if (vnode_isshadow(dp)) {
1023 		error = ENOENT;
1024 		goto out;
1025 	}
1026 #endif
1027 
1028 nextname:
1029 	/*
1030 	 * Not a symbolic link.  If more pathname,
1031 	 * continue at next component, else return.
1032 	 *
1033 	 * Definitely have a dvp if there's another slash
1034 	 */
1035 	if (*ndp->ni_next == '/') {
1036 		cnp->cn_nameptr = ndp->ni_next + 1;
1037 		ndp->ni_pathlen--;
1038 		while (*cnp->cn_nameptr == '/') {
1039 			cnp->cn_nameptr++;
1040 			ndp->ni_pathlen--;
1041 		}
1042 
1043 		cp = cnp->cn_nameptr;
1044 		vnode_put(ndp->ni_dvp);
1045 		ndp->ni_dvp = NULLVP;
1046 
1047 		if (*cp == '\0') {
1048 			goto emptyname;
1049 		}
1050 
1051 		*keep_going = 1;
1052 		return 0;
1053 	}
1054 
1055 	/*
1056 	 * Disallow directory write attempts on read-only file systems.
1057 	 */
1058 	if (rdonly &&
1059 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
1060 		error = EROFS;
1061 		goto out;
1062 	}
1063 
1064 	/* If SAVESTART is set, we should have a dvp */
1065 	if (cnp->cn_flags & SAVESTART) {
1066 		/*
1067 		 * note that we already hold a reference
1068 		 * on both dp and ni_dvp, but for some reason
1069 		 * can't get another one... in this case we
1070 		 * need to do vnode_put on dp in 'bad2'
1071 		 */
1072 		if ((vnode_get(ndp->ni_dvp))) {
1073 			error = ENOENT;
1074 			goto out;
1075 		}
1076 		ndp->ni_startdir = ndp->ni_dvp;
1077 	}
1078 	if (!wantparent && ndp->ni_dvp) {
1079 		vnode_put(ndp->ni_dvp);
1080 		ndp->ni_dvp = NULLVP;
1081 	}
1082 
1083 	if (cnp->cn_flags & AUDITVNPATH1) {
1084 		AUDIT_ARG(vnpath, dp, ARG_VNODE1);
1085 	} else if (cnp->cn_flags & AUDITVNPATH2) {
1086 		AUDIT_ARG(vnpath, dp, ARG_VNODE2);
1087 	}
1088 
1089 #if NAMEDRSRCFORK
1090 	/*
1091 	 * Caller wants the resource fork.
1092 	 */
1093 	if ((cnp->cn_flags & CN_WANTSRSRCFORK) && (dp != NULLVP)) {
1094 		error = lookup_handle_rsrc_fork(dp, ndp, cnp, wantparent, ctx);
1095 		if (error != 0) {
1096 			goto out;
1097 		}
1098 
1099 		dp = ndp->ni_vp;
1100 	}
1101 #endif
1102 	if (kdebug_enable) {
1103 		kdebug_lookup(ndp->ni_vp, cnp);
1104 	}
1105 
1106 	return 0;
1107 
1108 emptyname:
1109 	error = lookup_handle_emptyname(ndp, cnp, wantparent);
1110 	if (error != 0) {
1111 		goto out;
1112 	}
1113 
1114 	return 0;
1115 out:
1116 	return error;
1117 }
1118 
1119 /*
1120  * Comes in iocount on ni_vp.  May overwrite ni_dvp, but doesn't interpret incoming value.
1121  */
1122 static int
lookup_handle_emptyname(struct nameidata * ndp,struct componentname * cnp,int wantparent)1123 lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent)
1124 {
1125 	vnode_t dp;
1126 	int error = 0;
1127 
1128 	dp = ndp->ni_vp;
1129 	cnp->cn_namelen = 0;
1130 	/*
1131 	 * A degenerate name (e.g. / or "") which is a way of
1132 	 * talking about a directory, e.g. like "/." or ".".
1133 	 */
1134 	if (dp->v_type != VDIR) {
1135 		error = ENOTDIR;
1136 		goto out;
1137 	}
1138 	if (cnp->cn_nameiop == CREATE && dp == rootvnode) {
1139 		error = EEXIST;
1140 		goto out;
1141 	}
1142 	if (cnp->cn_nameiop != LOOKUP) {
1143 		error = EISDIR;
1144 		goto out;
1145 	}
1146 	if (wantparent) {
1147 		/*
1148 		 * note that we already hold a reference
1149 		 * on dp, but for some reason can't
1150 		 * get another one... in this case we
1151 		 * need to do vnode_put on dp in 'bad'
1152 		 */
1153 		if ((vnode_get(dp))) {
1154 			error = ENOENT;
1155 			goto out;
1156 		}
1157 		ndp->ni_dvp = dp;
1158 	}
1159 	cnp->cn_flags &= ~ISDOTDOT;
1160 	cnp->cn_flags |= ISLASTCN;
1161 	ndp->ni_next = cnp->cn_nameptr;
1162 	ndp->ni_vp = dp;
1163 
1164 	if (cnp->cn_flags & AUDITVNPATH1) {
1165 		AUDIT_ARG(vnpath, dp, ARG_VNODE1);
1166 	} else if (cnp->cn_flags & AUDITVNPATH2) {
1167 		AUDIT_ARG(vnpath, dp, ARG_VNODE2);
1168 	}
1169 	if (cnp->cn_flags & SAVESTART) {
1170 		panic("lookup: SAVESTART");
1171 	}
1172 
1173 	return 0;
1174 out:
1175 	return error;
1176 }
1177 /*
1178  * Search a pathname.
1179  * This is a very central and rather complicated routine.
1180  *
1181  * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
1182  * The starting directory is taken from ni_startdir. The pathname is
1183  * descended until done, or a symbolic link is encountered. The variable
1184  * ni_more is clear if the path is completed; it is set to one if a
1185  * symbolic link needing interpretation is encountered.
1186  *
1187  * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
1188  * whether the name is to be looked up, created, renamed, or deleted.
1189  * When CREATE, RENAME, or DELETE is specified, information usable in
1190  * creating, renaming, or deleting a directory entry may be calculated.
1191  * If flag has LOCKPARENT or'ed into it, the parent directory is returned
1192  * locked. If flag has WANTPARENT or'ed into it, the parent directory is
1193  * returned unlocked. Otherwise the parent directory is not returned. If
1194  * the target of the pathname exists and LOCKLEAF is or'ed into the flag
1195  * the target is returned locked, otherwise it is returned unlocked.
1196  * When creating or renaming and LOCKPARENT is specified, the target may not
1197  * be ".".  When deleting and LOCKPARENT is specified, the target may be ".".
1198  *
1199  * Overall outline of lookup:
1200  *
1201  * dirloop:
1202  *	identify next component of name at ndp->ni_ptr
1203  *	handle degenerate case where name is null string
1204  *	if .. and crossing mount points and on mounted filesys, find parent
1205  *	call VNOP_LOOKUP routine for next component name
1206  *	    directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
1207  *	    component vnode returned in ni_vp (if it exists), locked.
1208  *	if result vnode is mounted on and crossing mount points,
1209  *	    find mounted on vnode
1210  *	if more components of name, do next level at dirloop
1211  *	return the answer in ni_vp, locked if LOCKLEAF set
1212  *	    if LOCKPARENT set, return locked parent in ni_dvp
1213  *	    if WANTPARENT set, return unlocked parent in ni_dvp
1214  *
1215  * Returns:	0			Success
1216  *		ENOENT			No such file or directory
1217  *		EBADF			Bad file descriptor
1218  *		ENOTDIR			Not a directory
1219  *		EROFS			Read-only file system [CREATE]
1220  *		EISDIR			Is a directory [CREATE]
1221  *		cache_lookup_path:ERECYCLE  (vnode was recycled from underneath us, redrive lookup again)
1222  *		vnode_authorize:EROFS
1223  *		vnode_authorize:EACCES
1224  *		vnode_authorize:EPERM
1225  *		vnode_authorize:???
1226  *		VNOP_LOOKUP:ENOENT	No such file or directory
1227  *		VNOP_LOOKUP:EJUSTRETURN	Restart system call (INTERNAL)
1228  *		VNOP_LOOKUP:???
1229  *		VFS_ROOT:ENOTSUP
1230  *		VFS_ROOT:ENOENT
1231  *		VFS_ROOT:???
1232  */
1233 int
lookup(struct nameidata * ndp)1234 lookup(struct nameidata *ndp)
1235 {
1236 	char    *cp;            /* pointer into pathname argument */
1237 	vnode_t         tdp;            /* saved dp */
1238 	vnode_t         dp;             /* the directory we are searching */
1239 	int docache = 1;                /* == 0 do not cache last component */
1240 	int wantparent;                 /* 1 => wantparent or lockparent flag */
1241 	int rdonly;                     /* lookup read-only flag bit */
1242 	int dp_authorized = 0;
1243 	int error = 0;
1244 	struct componentname *cnp = &ndp->ni_cnd;
1245 	vfs_context_t ctx = cnp->cn_context;
1246 	int vbusyflags = 0;
1247 	int nc_generation = 0;
1248 	vnode_t last_dp = NULLVP;
1249 	int keep_going;
1250 	int atroot;
1251 
1252 	/*
1253 	 * Setup: break out flag bits into variables.
1254 	 */
1255 	if (cnp->cn_flags & NOCACHE) {
1256 		docache = 0;
1257 	}
1258 	wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
1259 	rdonly = cnp->cn_flags & RDONLY;
1260 	cnp->cn_flags &= ~ISSYMLINK;
1261 	cnp->cn_consume = 0;
1262 
1263 	dp = ndp->ni_startdir;
1264 	ndp->ni_startdir = NULLVP;
1265 
1266 	if ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) {
1267 		vbusyflags = LK_NOWAIT;
1268 	}
1269 	cp = cnp->cn_nameptr;
1270 
1271 	if (*cp == '\0') {
1272 		if ((vnode_getwithref(dp))) {
1273 			dp = NULLVP;
1274 			error = ENOENT;
1275 			goto bad;
1276 		}
1277 		ndp->ni_vp = dp;
1278 		error = lookup_handle_emptyname(ndp, cnp, wantparent);
1279 		if (error) {
1280 			goto bad;
1281 		}
1282 
1283 		return 0;
1284 	}
1285 dirloop:
1286 	atroot = 0;
1287 	ndp->ni_vp = NULLVP;
1288 
1289 	if ((error = cache_lookup_path(ndp, cnp, dp, ctx, &dp_authorized, last_dp))) {
1290 		dp = NULLVP;
1291 		goto bad;
1292 	}
1293 	if ((cnp->cn_flags & ISLASTCN)) {
1294 		if (docache) {
1295 			cnp->cn_flags |= MAKEENTRY;
1296 		}
1297 	} else {
1298 		cnp->cn_flags |= MAKEENTRY;
1299 	}
1300 
1301 	dp = ndp->ni_dvp;
1302 
1303 	if (ndp->ni_vp != NULLVP) {
1304 		/*
1305 		 * cache_lookup_path returned a non-NULL ni_vp then,
1306 		 * we're guaranteed that the dp is a VDIR, it's
1307 		 * been authorized, and vp is not ".."
1308 		 *
1309 		 * make sure we don't try to enter the name back into
1310 		 * the cache if this vp is purged before we get to that
1311 		 * check since we won't have serialized behind whatever
1312 		 * activity is occurring in the FS that caused the purge
1313 		 */
1314 		if (dp != NULLVP) {
1315 			nc_generation = dp->v_nc_generation - 1;
1316 		}
1317 
1318 		goto returned_from_lookup_path;
1319 	}
1320 
1321 	/*
1322 	 * Handle "..": three special cases.
1323 	 * 1. if at starting directory (e.g. the cwd/usedvp)
1324 	 *    and RESOLVE_BENEATH, then return EACCES.
1325 	 * 2. If at root directory (e.g. after chroot)
1326 	 *    or at absolute root directory
1327 	 *    then ignore it so can't get out.
1328 	 * 3. If this vnode is the root of a mounted
1329 	 *    filesystem, then replace it with the
1330 	 *    vnode which was mounted on so we take the
1331 	 *    .. in the other file system.
1332 	 */
1333 	if ((cnp->cn_flags & ISDOTDOT)) {
1334 		/* if dp is the starting directory and RESOLVE_BENEATH, we should return EACCES */
1335 		if ((ndp->ni_flag & NAMEI_RESOLVE_BENEATH) && (dp == ndp->ni_usedvp)) {
1336 			error = EACCES;
1337 			goto bad;
1338 		}
1339 		/*
1340 		 * if this is a chroot'ed process, check if the current
1341 		 * directory is still a subdirectory of the process's
1342 		 * root directory.
1343 		 */
1344 		if (ndp->ni_rootdir && (ndp->ni_rootdir != rootvnode) &&
1345 		    dp != ndp->ni_rootdir) {
1346 			int sdir_error;
1347 			int is_subdir = FALSE;
1348 
1349 			sdir_error = vnode_issubdir(dp, ndp->ni_rootdir,
1350 			    &is_subdir, vfs_context_kernel());
1351 
1352 			/*
1353 			 * If we couldn't determine if dp is a subdirectory of
1354 			 * ndp->ni_rootdir (sdir_error != 0), we let the request
1355 			 * proceed.
1356 			 */
1357 			if (!sdir_error && !is_subdir) {
1358 				vnode_put(dp);
1359 				dp = ndp->ni_rootdir;
1360 				/*
1361 				 * There's a ref on the process's root directory
1362 				 * but we can't use vnode_getwithref here as
1363 				 * there is nothing preventing that ref being
1364 				 * released by another thread.
1365 				 */
1366 				if (vnode_get(dp)) {
1367 					dp = NULLVP;
1368 					error = ENOENT;
1369 					goto bad;
1370 				}
1371 			}
1372 		}
1373 
1374 		for (;;) {
1375 			if (dp == ndp->ni_rootdir || dp == rootvnode) {
1376 				ndp->ni_dvp = dp;
1377 				ndp->ni_vp = dp;
1378 				/*
1379 				 * we're pinned at the root
1380 				 * we've already got one reference on 'dp'
1381 				 * courtesy of cache_lookup_path... take
1382 				 * another one for the ".."
1383 				 * if we fail to get the new reference, we'll
1384 				 * drop our original down in 'bad'
1385 				 */
1386 				if (vnode_get(dp)) {
1387 					error = ENOENT;
1388 					goto bad;
1389 				}
1390 				atroot = 1;
1391 				goto returned_from_lookup_path;
1392 			}
1393 			if ((dp->v_flag & VROOT) == 0 ||
1394 			    (cnp->cn_flags & NOCROSSMOUNT)) {
1395 				break;
1396 			}
1397 			if (dp->v_mount == NULL) {      /* forced umount */
1398 				error = EBADF;
1399 				goto bad;
1400 			}
1401 			tdp = dp;
1402 			dp = tdp->v_mount->mnt_vnodecovered;
1403 
1404 			if ((vnode_getwithref(dp))) {
1405 				vnode_put(tdp);
1406 				dp = NULLVP;
1407 				error = ENOENT;
1408 				goto bad;
1409 			}
1410 
1411 			vnode_put(tdp);
1412 
1413 			ndp->ni_dvp = dp;
1414 			dp_authorized = 0;
1415 		}
1416 	}
1417 
1418 	/*
1419 	 * We now have a segment name to search for, and a directory to search.
1420 	 */
1421 #if CONFIG_UNION_MOUNTS
1422 unionlookup:
1423 #endif /* CONFIG_UNION_MOUNTS */
1424 	ndp->ni_vp = NULLVP;
1425 
1426 	if (dp->v_type != VDIR) {
1427 		error = ENOTDIR;
1428 		goto lookup_error;
1429 	}
1430 	if ((cnp->cn_flags & DONOTAUTH) != DONOTAUTH) {
1431 		error = lookup_authorize_search(dp, cnp, dp_authorized, ctx);
1432 		if (error) {
1433 			goto lookup_error;
1434 		}
1435 	}
1436 
1437 	/*
1438 	 * Now that we've authorized a lookup, can bail out if the filesystem
1439 	 * will be doing a batched operation.  Return an iocount on dvp.
1440 	 */
1441 #if NAMEDRSRCFORK
1442 	if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp) && !(cnp->cn_flags & CN_WANTSRSRCFORK)) {
1443 #else
1444 	if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp)) {
1445 #endif /* NAMEDRSRCFORK */
1446 		ndp->ni_flag |= NAMEI_UNFINISHED;
1447 		ndp->ni_ncgeneration = dp->v_nc_generation;
1448 		return 0;
1449 	}
1450 
1451 	nc_generation = dp->v_nc_generation;
1452 
1453 	/*
1454 	 * Note:
1455 	 * Filesystems that support hardlinks may want to call vnode_update_identity
1456 	 * if the lookup operation below will modify the in-core vnode to belong to a new point
1457 	 * in the namespace.  VFS cannot infer whether or not the look up operation makes the vnode
1458 	 * name change or change parents.  Without this, the lookup may make update
1459 	 * filesystem-specific in-core metadata but fail to update the v_parent or v_name
1460 	 * fields in the vnode.  If VFS were to do this, it would be necessary to call
1461 	 * vnode_update_identity on every lookup operation -- expensive!
1462 	 *
1463 	 * However, even with this in place, multiple lookups may occur in between this lookup
1464 	 * and the subsequent vnop, so, at best, we could only guarantee that you would get a
1465 	 * valid path back, and not necessarily the one that you wanted.
1466 	 *
1467 	 * Example:
1468 	 * /tmp/a == /foo/b
1469 	 *
1470 	 * If you are now looking up /foo/b and the vnode for this link represents /tmp/a,
1471 	 * vnode_update_identity will fix the parentage so that you can get /foo/b back
1472 	 * through the v_parent chain (preventing you from getting /tmp/b back). It would
1473 	 * not fix whether or not you should or should not get /tmp/a vs. /foo/b.
1474 	 */
1475 
1476 	error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx);
1477 
1478 	if (error) {
1479 lookup_error:
1480 #if CONFIG_UNION_MOUNTS
1481 		if ((error == ENOENT) &&
1482 		    (dp->v_mount != NULL) &&
1483 		    (dp->v_mount->mnt_flag & MNT_UNION)) {
1484 			tdp = dp;
1485 			error = lookup_traverse_union(tdp, &dp, ctx);
1486 			vnode_put(tdp);
1487 			if (error) {
1488 				dp = NULLVP;
1489 				goto bad;
1490 			}
1491 
1492 			ndp->ni_dvp = dp;
1493 			dp_authorized = 0;
1494 			goto unionlookup;
1495 		}
1496 #endif /* CONFIG_UNION_MOUNTS */
1497 
1498 		if (error != EJUSTRETURN) {
1499 			goto bad;
1500 		}
1501 
1502 		if (ndp->ni_vp != NULLVP) {
1503 			panic("leaf should be empty");
1504 		}
1505 
1506 #if NAMEDRSRCFORK
1507 		/*
1508 		 * At this point, error should be EJUSTRETURN.
1509 		 *
1510 		 * If CN_WANTSRSRCFORK is set, that implies that the
1511 		 * underlying filesystem could not find the "parent" of the
1512 		 * resource fork (the data fork), and we are doing a lookup
1513 		 * for a CREATE event.
1514 		 *
1515 		 * However, this should be converted to an error, as the
1516 		 * failure to find this parent should disallow further
1517 		 * progress to try and acquire a resource fork vnode.
1518 		 */
1519 		if (cnp->cn_flags & CN_WANTSRSRCFORK) {
1520 			error = ENOENT;
1521 			goto bad;
1522 		}
1523 #endif
1524 
1525 		error = lookup_validate_creation_path(ndp);
1526 		if (error) {
1527 			goto bad;
1528 		}
1529 		/*
1530 		 * We return with ni_vp NULL to indicate that the entry
1531 		 * doesn't currently exist, leaving a pointer to the
1532 		 * referenced directory vnode in ndp->ni_dvp.
1533 		 */
1534 		if (cnp->cn_flags & SAVESTART) {
1535 			if ((vnode_get(ndp->ni_dvp))) {
1536 				error = ENOENT;
1537 				goto bad;
1538 			}
1539 			ndp->ni_startdir = ndp->ni_dvp;
1540 		}
1541 		if (!wantparent) {
1542 			vnode_put(ndp->ni_dvp);
1543 		}
1544 
1545 		if (kdebug_enable) {
1546 			kdebug_lookup(ndp->ni_dvp, cnp);
1547 		}
1548 		return 0;
1549 	}
1550 returned_from_lookup_path:
1551 	/* We'll always have an iocount on ni_vp when this finishes. */
1552 	error = lookup_handle_found_vnode(ndp, cnp, rdonly, vbusyflags, &keep_going, nc_generation, wantparent, atroot, ctx);
1553 	if (error != 0) {
1554 		goto bad2;
1555 	}
1556 
1557 	if (keep_going) {
1558 		dp = ndp->ni_vp;
1559 
1560 		/* namei() will handle symlinks */
1561 		if ((dp->v_type == VLNK) &&
1562 		    ((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) {
1563 			return 0;
1564 		}
1565 
1566 		/*
1567 		 * Otherwise, there's more path to process.
1568 		 * cache_lookup_path is now responsible for dropping io ref on dp
1569 		 * when it is called again in the dirloop.  This ensures we hold
1570 		 * a ref on dp until we complete the next round of lookup.
1571 		 */
1572 		last_dp = dp;
1573 
1574 		goto dirloop;
1575 	}
1576 
1577 	return 0;
1578 bad2:
1579 	if (ndp->ni_dvp) {
1580 		vnode_put(ndp->ni_dvp);
1581 	}
1582 
1583 	vnode_put(ndp->ni_vp);
1584 	ndp->ni_vp = NULLVP;
1585 
1586 	if (kdebug_enable) {
1587 		kdebug_lookup(dp, cnp);
1588 	}
1589 	return error;
1590 
1591 bad:
1592 	if (dp) {
1593 		vnode_put(dp);
1594 	}
1595 	ndp->ni_vp = NULLVP;
1596 
1597 	if (kdebug_enable) {
1598 		kdebug_lookup(dp, cnp);
1599 	}
1600 	return error;
1601 }
1602 
1603 #if CONFIG_UNION_MOUNTS
1604 /*
1605  * Given a vnode in a union mount, traverse to the equivalent
1606  * vnode in the underlying mount.
1607  */
1608 int
1609 lookup_traverse_union(vnode_t dvp, vnode_t *new_dvp, vfs_context_t ctx)
1610 {
1611 	char *path = NULL, *pp;
1612 	const char *name, *np;
1613 	size_t len;
1614 	int error = 0;
1615 	struct nameidata nd;
1616 	vnode_t vp = dvp;
1617 
1618 	*new_dvp = NULL;
1619 
1620 	if (vp && vp->v_flag & VROOT) {
1621 		*new_dvp = vp->v_mount->mnt_vnodecovered;
1622 		if (vnode_getwithref(*new_dvp)) {
1623 			return ENOENT;
1624 		}
1625 		return 0;
1626 	}
1627 
1628 	path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
1629 
1630 	/*
1631 	 * Walk back up to the mountpoint following the
1632 	 * v_parent chain and build a slash-separated path.
1633 	 * Then lookup that path starting with the covered vnode.
1634 	 */
1635 	pp = path + (MAXPATHLEN - 1);
1636 	*pp = '\0';
1637 
1638 	while (1) {
1639 		name = vnode_getname(vp);
1640 		if (name == NULL) {
1641 			printf("lookup_traverse_union: null parent name: .%s\n", pp);
1642 			error = ENOENT;
1643 			goto done;
1644 		}
1645 		len = strlen(name);
1646 		if ((len + 1) > (size_t)(pp - path)) {          // Enough space for this name ?
1647 			error = ENAMETOOLONG;
1648 			vnode_putname(name);
1649 			goto done;
1650 		}
1651 		for (np = name + len; len > 0; len--) { // Copy name backwards
1652 			*--pp = *--np;
1653 		}
1654 		vnode_putname(name);
1655 		vp = vp->v_parent;
1656 		if (vp == NULLVP || vp->v_flag & VROOT) {
1657 			break;
1658 		}
1659 		*--pp = '/';
1660 	}
1661 
1662 	/* Evaluate the path in the underlying mount */
1663 	NDINIT(&nd, LOOKUP, OP_LOOKUP, USEDVP, UIO_SYSSPACE, CAST_USER_ADDR_T(pp), ctx);
1664 	nd.ni_dvp = dvp->v_mount->mnt_vnodecovered;
1665 	error = namei(&nd);
1666 	if (error == 0) {
1667 		*new_dvp = nd.ni_vp;
1668 	}
1669 	nameidone(&nd);
1670 done:
1671 	if (path) {
1672 		zfree(ZV_NAMEI, path);
1673 	}
1674 	return error;
1675 }
1676 #endif /* CONFIG_UNION_MOUNTS */
1677 
1678 int
1679 lookup_validate_creation_path(struct nameidata *ndp)
1680 {
1681 	struct componentname *cnp = &ndp->ni_cnd;
1682 
1683 	/*
1684 	 * If creating and at end of pathname, then can consider
1685 	 * allowing file to be created.
1686 	 */
1687 	if (cnp->cn_flags & RDONLY) {
1688 		return EROFS;
1689 	}
1690 	if ((cnp->cn_flags & ISLASTCN) && (ndp->ni_flag & NAMEI_TRAILINGSLASH) && !(cnp->cn_flags & WILLBEDIR)) {
1691 		return ENOENT;
1692 	}
1693 
1694 	return 0;
1695 }
1696 
1697 /*
1698  * Modifies only ni_vp.  Always returns with ni_vp still valid (iocount held).
1699  */
1700 static int
1701 lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
1702     int vbusyflags, vfs_context_t ctx)
1703 {
1704 	mount_t mp;
1705 	vnode_t tdp;
1706 	int error = 0;
1707 	uint32_t depth = 0;
1708 	vnode_t mounted_on_dp;
1709 	int current_mount_generation = 0;
1710 #if CONFIG_TRIGGERS
1711 	vnode_t triggered_dp = NULLVP;
1712 	int retry_cnt = 0;
1713 #define MAX_TRIGGER_RETRIES 1
1714 #endif
1715 
1716 	if (dp->v_type != VDIR || cnp->cn_flags & NOCROSSMOUNT) {
1717 		return 0;
1718 	}
1719 
1720 	mounted_on_dp = dp;
1721 #if CONFIG_TRIGGERS
1722 restart:
1723 #endif
1724 	current_mount_generation = mount_generation;
1725 
1726 	while (dp->v_mountedhere) {
1727 		vnode_lock_spin(dp);
1728 		if ((mp = dp->v_mountedhere)) {
1729 			mp->mnt_crossref++;
1730 			vnode_unlock(dp);
1731 		} else {
1732 			vnode_unlock(dp);
1733 			break;
1734 		}
1735 
1736 		if (ISSET(mp->mnt_lflag, MNT_LFORCE)) {
1737 			mount_dropcrossref(mp, dp, 0);
1738 			break;  // don't traverse into a forced unmount
1739 		}
1740 
1741 
1742 		if (vfs_busy(mp, vbusyflags)) {
1743 			mount_dropcrossref(mp, dp, 0);
1744 			if (vbusyflags == LK_NOWAIT) {
1745 				error = ENOENT;
1746 				goto out;
1747 			}
1748 
1749 			continue;
1750 		}
1751 
1752 		error = VFS_ROOT(mp, &tdp, ctx);
1753 
1754 		mount_dropcrossref(mp, dp, 0);
1755 		vfs_unbusy(mp);
1756 
1757 		if (error) {
1758 			goto out;
1759 		}
1760 
1761 		vnode_put(dp);
1762 		ndp->ni_vp = dp = tdp;
1763 		if (dp->v_type != VDIR) {
1764 #if DEVELOPMENT || DEBUG
1765 			panic("%s : Root of filesystem not a directory",
1766 			    __FUNCTION__);
1767 #else
1768 			break;
1769 #endif
1770 		}
1771 		depth++;
1772 	}
1773 
1774 #if CONFIG_TRIGGERS
1775 	/*
1776 	 * The triggered_dp check here is required but is susceptible to a
1777 	 * (unlikely) race in which trigger mount is done from here and is
1778 	 * unmounted before we get past vfs_busy above. We retry to deal with
1779 	 * that case but it has the side effect of unwanted retries for
1780 	 * "special" processes which don't want to trigger mounts.
1781 	 */
1782 	if (dp->v_resolve && retry_cnt < MAX_TRIGGER_RETRIES) {
1783 		error = vnode_trigger_resolve(dp, ndp, ctx);
1784 		if (error) {
1785 			goto out;
1786 		}
1787 		if (dp == triggered_dp) {
1788 			retry_cnt += 1;
1789 		} else {
1790 			retry_cnt = 0;
1791 		}
1792 		triggered_dp = dp;
1793 		goto restart;
1794 	}
1795 #endif /* CONFIG_TRIGGERS */
1796 
1797 	if (depth) {
1798 		mp = mounted_on_dp->v_mountedhere;
1799 
1800 		if (mp) {
1801 			mount_lock_spin(mp);
1802 			mp->mnt_realrootvp_vid = dp->v_id;
1803 			mp->mnt_realrootvp = dp;
1804 			mp->mnt_generation = current_mount_generation;
1805 			mount_unlock(mp);
1806 		}
1807 	}
1808 
1809 	return 0;
1810 
1811 out:
1812 	return error;
1813 }
1814 
1815 /*
1816  * Takes ni_vp and ni_dvp non-NULL.  Returns with *new_dp set to the location
1817  * at which to start a lookup with a resolved path, and all other iocounts dropped.
1818  */
1819 static int
1820 lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, bool *new_dp_has_iocount, vfs_context_t ctx)
1821 {
1822 	int error;
1823 	char *cp = NULL;               /* pointer into pathname argument */
1824 	u_int cplen = 0;
1825 	uio_t auio;
1826 	UIO_STACKBUF(uio_buf, 1);
1827 	int need_newpathbuf;
1828 	u_int linklen = 0;
1829 	struct componentname *cnp = &ndp->ni_cnd;
1830 	vnode_t dp;
1831 	char *tmppn;
1832 	u_int rsrclen = (cnp->cn_flags & CN_WANTSRSRCFORK) ? sizeof(_PATH_RSRCFORKSPEC) : 0;
1833 	bool dp_has_iocount = false;
1834 
1835 	if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
1836 		return ELOOP;
1837 	}
1838 #if CONFIG_MACF
1839 	if ((error = mac_vnode_check_readlink(ctx, ndp->ni_vp)) != 0) {
1840 		return error;
1841 	}
1842 #endif /* MAC */
1843 	if (ndp->ni_pathlen > 1 || !(cnp->cn_flags & HASBUF)) {
1844 		need_newpathbuf = 1;
1845 	} else {
1846 		need_newpathbuf = 0;
1847 	}
1848 
1849 	if (need_newpathbuf) {
1850 		if (!(cnp->cn_flags & HASBUF) || cnp->cn_pnlen == MAXPATHLEN) {
1851 			cp = zalloc(ZV_NAMEI);
1852 			cplen = MAXPATHLEN;
1853 		} else {
1854 			assert(proc_support_long_paths(vfs_context_proc(ctx)));
1855 			cp = kalloc_data(cnp->cn_pnlen, Z_WAITOK | Z_ZERO);
1856 			cplen = cnp->cn_pnlen;
1857 		}
1858 	} else {
1859 		cp = cnp->cn_pnbuf;
1860 	}
1861 	auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
1862 
1863 	uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN);
1864 
1865 	error = VNOP_READLINK(ndp->ni_vp, auio, ctx);
1866 
1867 	if (!error) {
1868 		user_ssize_t resid = uio_resid(auio);
1869 
1870 		assert(resid <= MAXPATHLEN);
1871 
1872 		if (resid == MAXPATHLEN) {
1873 			linklen = 0;
1874 		} else {
1875 			/*
1876 			 * Safe to set unsigned with a [larger] signed type here
1877 			 * because 0 <= uio_resid <= MAXPATHLEN and MAXPATHLEN
1878 			 * is only 1024.
1879 			 */
1880 			linklen = (u_int)strnlen(cp, MAXPATHLEN - (u_int)resid);
1881 		}
1882 
1883 		size_t maxlen = proc_support_long_paths(vfs_context_proc(ctx)) ? MAXLONGPATHLEN : MAXPATHLEN;
1884 
1885 		if (linklen == 0) {
1886 			error = ENOENT;
1887 		} else if (linklen + ndp->ni_pathlen + rsrclen > maxlen) {
1888 			error = ENAMETOOLONG;
1889 		}
1890 	}
1891 
1892 	if (error) {
1893 		if (need_newpathbuf) {
1894 			if (cplen == MAXPATHLEN) {
1895 				zfree(ZV_NAMEI, cp);
1896 			} else {
1897 				kfree_data(cp, cplen);
1898 			}
1899 		}
1900 		return error;
1901 	}
1902 
1903 	if (need_newpathbuf) {
1904 		tmppn = cnp->cn_pnbuf;
1905 		u_int tmplen = cnp->cn_pnlen;
1906 		bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
1907 		cnp->cn_pnbuf = cp;
1908 		cnp->cn_pnlen = cplen;
1909 
1910 		if ((cnp->cn_flags & HASBUF)) {
1911 			if (tmplen == MAXPATHLEN) {
1912 				zfree(ZV_NAMEI, tmppn);
1913 			} else {
1914 				kfree_data(tmppn, tmplen);
1915 			}
1916 		} else {
1917 			cnp->cn_flags |= HASBUF;
1918 		}
1919 	} else {
1920 		cnp->cn_pnbuf[linklen] = '\0';
1921 	}
1922 
1923 	ndp->ni_pathlen += linklen;
1924 	cnp->cn_nameptr = cnp->cn_pnbuf;
1925 
1926 	/*
1927 	 * starting point for 'relative'
1928 	 * symbolic link path
1929 	 */
1930 	dp = ndp->ni_dvp;
1931 
1932 	/*
1933 	 * get rid of reference returned via 'lookup'
1934 	 * ni_dvp is released only if we restart at /.
1935 	 */
1936 	vnode_put(ndp->ni_vp);
1937 	ndp->ni_vp = NULLVP;
1938 	ndp->ni_dvp = NULLVP;
1939 
1940 	dp_has_iocount = true;
1941 
1942 	/*
1943 	 * Check if symbolic link restarts us at the root
1944 	 */
1945 	if (*(cnp->cn_nameptr) == '/') {
1946 		/* return EACCES if resolve beneath and the symlink restarts at root */
1947 		if (ndp->ni_flag & NAMEI_RESOLVE_BENEATH) {
1948 			vnode_put(dp); /* ALWAYS have a dvp for a symlink */
1949 			return EACCES;
1950 		}
1951 		while (*(cnp->cn_nameptr) == '/') {
1952 			cnp->cn_nameptr++;
1953 			ndp->ni_pathlen--;
1954 		}
1955 		if (linklen != 0) {
1956 			vnode_put(dp); /* ALWAYS have a dvp for a symlink */
1957 			dp_has_iocount = false;
1958 			if ((dp = ndp->ni_rootdir) == NULLVP) {
1959 				return ENOENT;
1960 			}
1961 		}
1962 	}
1963 
1964 	*new_dp = dp;
1965 	*new_dp_has_iocount = dp_has_iocount;
1966 
1967 	return 0;
1968 }
1969 
1970 /*
1971  * relookup - lookup a path name component
1972  *    Used by lookup to re-aquire things.
1973  */
1974 int
1975 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
1976 {
1977 	struct vnode *dp = NULL;                /* the directory we are searching */
1978 	int wantparent;                 /* 1 => wantparent or lockparent flag */
1979 	int rdonly;                     /* lookup read-only flag bit */
1980 	int error = 0;
1981 #ifdef NAMEI_DIAGNOSTIC
1982 	int i, newhash;                 /* DEBUG: check name hash */
1983 	char *cp;                       /* DEBUG: check name ptr/len */
1984 #endif
1985 	vfs_context_t ctx = cnp->cn_context;
1986 
1987 	/*
1988 	 * Setup: break out flag bits into variables.
1989 	 */
1990 	wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
1991 	rdonly = cnp->cn_flags & RDONLY;
1992 	cnp->cn_flags &= ~ISSYMLINK;
1993 
1994 	if (cnp->cn_flags & NOCACHE) {
1995 		cnp->cn_flags &= ~MAKEENTRY;
1996 	} else {
1997 		cnp->cn_flags |= MAKEENTRY;
1998 	}
1999 
2000 	dp = dvp;
2001 
2002 	/*
2003 	 * Check for degenerate name (e.g. / or "")
2004 	 * which is a way of talking about a directory,
2005 	 * e.g. like "/." or ".".
2006 	 */
2007 	if (cnp->cn_nameptr[0] == '\0') {
2008 		if (cnp->cn_nameiop != LOOKUP || wantparent) {
2009 			error = EISDIR;
2010 			goto bad;
2011 		}
2012 		if (dp->v_type != VDIR) {
2013 			error = ENOTDIR;
2014 			goto bad;
2015 		}
2016 		if ((vnode_get(dp))) {
2017 			error = ENOENT;
2018 			goto bad;
2019 		}
2020 		*vpp = dp;
2021 
2022 		if (cnp->cn_flags & SAVESTART) {
2023 			panic("lookup: SAVESTART");
2024 		}
2025 		return 0;
2026 	}
2027 	/*
2028 	 * We now have a segment name to search for, and a directory to search.
2029 	 */
2030 	if ((error = VNOP_LOOKUP(dp, vpp, cnp, ctx))) {
2031 		if (error != EJUSTRETURN) {
2032 			goto bad;
2033 		}
2034 #if DIAGNOSTIC
2035 		if (*vpp != NULL) {
2036 			panic("leaf should be empty");
2037 		}
2038 #endif
2039 		/*
2040 		 * If creating and at end of pathname, then can consider
2041 		 * allowing file to be created.
2042 		 */
2043 		if (rdonly) {
2044 			error = EROFS;
2045 			goto bad;
2046 		}
2047 		/*
2048 		 * We return with ni_vp NULL to indicate that the entry
2049 		 * doesn't currently exist, leaving a pointer to the
2050 		 * (possibly locked) directory inode in ndp->ni_dvp.
2051 		 */
2052 		return 0;
2053 	}
2054 	dp = *vpp;
2055 
2056 #if DIAGNOSTIC
2057 	/*
2058 	 * Check for symbolic link
2059 	 */
2060 	if (dp->v_type == VLNK && (cnp->cn_flags & FOLLOW)) {
2061 		panic("relookup: symlink found.");
2062 	}
2063 #endif
2064 
2065 	/*
2066 	 * Disallow directory write attempts on read-only file systems.
2067 	 */
2068 	if (rdonly &&
2069 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
2070 		error = EROFS;
2071 		goto bad2;
2072 	}
2073 	/* ASSERT(dvp == ndp->ni_startdir) */
2074 
2075 	return 0;
2076 
2077 bad2:
2078 	vnode_put(dp);
2079 bad:
2080 	*vpp = NULL;
2081 
2082 	return error;
2083 }
2084 
2085 /*
2086  * Free pathname buffer
2087  */
2088 void
2089 nameidone(struct nameidata *ndp)
2090 {
2091 	if (ndp->ni_cnd.cn_flags & HASBUF) {
2092 		char *tmp = ndp->ni_cnd.cn_pnbuf;
2093 
2094 		ndp->ni_cnd.cn_pnbuf = NULL;
2095 		ndp->ni_cnd.cn_flags &= ~HASBUF;
2096 		if (ndp->ni_cnd.cn_pnlen == MAXPATHLEN) {
2097 			zfree(ZV_NAMEI, tmp);
2098 		} else {
2099 			kfree_data(tmp, ndp->ni_cnd.cn_pnlen);
2100 		}
2101 	}
2102 }
2103 
2104 
2105 /*
2106  * Log (part of) a pathname using kdebug, as used by fs_usage.  The path up to
2107  * and including the current component name are logged.  Up to NUMPARMS * 4
2108  * bytes of pathname will be logged.  If the path to be logged is longer than
2109  * that, then the last NUMPARMS * 4 bytes are logged. That is, the truncation
2110  * removes the leading portion of the path.
2111  *
2112  * The logging is done via multiple KDBG_RELEASE calls.  The first one is marked
2113  * with DBG_FUNC_START.  The last one is marked with DBG_FUNC_END (in addition
2114  * to DBG_FUNC_START if it is also the first).  There may be intermediate ones
2115  * with neither DBG_FUNC_START nor DBG_FUNC_END.
2116  *
2117  * The first event passes the vnode pointer and 24 or 32 (on K32, 12 or 24)
2118  * bytes of pathname.  The remaining events add 32 (on K32, 16) bytes of
2119  * pathname each.  The minimum number of events required to pass the path are
2120  * used.  Any excess padding in the final event (because not all of the 24 or 32
2121  * (on K32, 12 or 16) bytes are needed for the remainder of the path) is set to
2122  * zero bytes, or '>' if there is more path beyond the current component name
2123  * (usually because an intermediate component was not found).
2124  *
2125  * NOTE: If the path length is greater than NUMPARMS * 4, or is not of the form
2126  * 24 + N * 32 (or on K32, 12 + N * 16), there will be no padding.
2127  */
2128 #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST)
2129 
2130 void
2131 kdebug_vfs_lookup(const char *path, size_t path_len, void *vnp,
2132     uint32_t flags)
2133 {
2134 	unsigned long path_words[4] = {};
2135 	size_t trace_len = MIN(sizeof(path_words) - sizeof(path_words[0]), path_len);
2136 	size_t path_next = 0;
2137 	bool noprocfilt = flags & KDBG_VFS_LOOKUP_FLAG_NOPROCFILT;
2138 
2139 	assert(path_len >= 0);
2140 
2141 	int code = ((flags & KDBG_VFS_LOOKUP_FLAG_LOOKUP) ? VFS_LOOKUP :
2142 	    VFS_LOOKUP_DONE) | DBG_FUNC_START;
2143 
2144 	if (path_len <= (3 * (int)sizeof(long))) {
2145 		code |= DBG_FUNC_END;
2146 	}
2147 	memcpy(path_words, path, trace_len);
2148 	path_next += trace_len;
2149 
2150 	if (noprocfilt) {
2151 		KDBG_RELEASE_NOPROCFILT(code, kdebug_vnode(vnp), path_words[0],
2152 		    path_words[1], path_words[2]);
2153 	} else {
2154 		KDBG_RELEASE(code, kdebug_vnode(vnp), path_words[0], path_words[1],
2155 		    path_words[2]);
2156 	}
2157 
2158 	code &= ~DBG_FUNC_START;
2159 
2160 	for (int i = 3; i * (int)sizeof(long) < path_len; i += 4) {
2161 		trace_len = sizeof(path_words);
2162 		if ((i + 4) * (int)sizeof(long) >= path_len) {
2163 			code |= DBG_FUNC_END;
2164 			trace_len = path_len - path_next;
2165 			memset(path_words, 0, sizeof(path_words));
2166 		}
2167 		memcpy(path_words, &path[path_next], trace_len);
2168 		path_next += trace_len;
2169 
2170 		if (noprocfilt) {
2171 			KDBG_RELEASE_NOPROCFILT(code, path_words[0], path_words[1],
2172 			    path_words[2], path_words[3]);
2173 		} else {
2174 			KDBG_RELEASE(code, path_words[0], path_words[1],
2175 			    path_words[2], path_words[3]);
2176 		}
2177 	}
2178 }
2179 
2180 void
2181 kdebug_lookup_gen_events(long *path_words, int path_len, void *vnp, bool lookup)
2182 {
2183 	assert(path_len >= 0);
2184 	kdebug_vfs_lookup((const char *)path_words, path_len, vnp,
2185 	    lookup ? KDBG_VFS_LOOKUP_FLAG_LOOKUP : 0);
2186 }
2187 
2188 void
2189 kdebug_lookup(vnode_t vnp, struct componentname *cnp)
2190 {
2191 	kdebug_vfs_lookup(cnp->cn_pnbuf, strnlen(cnp->cn_pnbuf, cnp->cn_pnlen), vnp, KDBG_VFS_LOOKUP_FLAG_LOOKUP);
2192 }
2193 
2194 #else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */
2195 
2196 void
2197 kdebug_vfs_lookup(const char *dbg_parms __unused, size_t dbg_namelen __unused,
2198     void *dp __unused, __unused kdebug_vfs_lookup_flags_t flags)
2199 {
2200 }
2201 
2202 static void
2203 kdebug_lookup(struct vnode *dp __unused, struct componentname *cnp __unused)
2204 {
2205 }
2206 #endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */
2207 
2208 int
2209 vfs_getbyid(fsid_t *fsid, ino64_t ino, vnode_t *vpp, vfs_context_t ctx)
2210 {
2211 	mount_t mp;
2212 	int error;
2213 
2214 	mp = mount_lookupby_volfsid(fsid->val[0], 1);
2215 	if (mp == NULL) {
2216 		return EINVAL;
2217 	}
2218 
2219 	/* Get the target vnode. */
2220 	if (ino == 2) {
2221 		error = VFS_ROOT(mp, vpp, ctx);
2222 	} else {
2223 		error = VFS_VGET(mp, ino, vpp, ctx);
2224 	}
2225 
2226 	vfs_unbusy(mp);
2227 	return error;
2228 }
2229 /*
2230  * Obtain the real path from a legacy volfs style path.
2231  *
2232  * Valid formats of input path:
2233  *
2234  *	"555/@"
2235  *	"555/2"
2236  *	"555/123456"
2237  *	"555/123456/foobar"
2238  *
2239  * Where:
2240  *	555 represents the volfs file system id
2241  *	'@' and '2' are aliases to the root of a file system
2242  *	123456 represents a file id
2243  *	"foobar" represents a file name
2244  */
2245 #if CONFIG_VOLFS
2246 static int
2247 vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_t ctx)
2248 {
2249 	vnode_t vp;
2250 	struct mount *mp = NULL;
2251 	char  *str;
2252 	char ch;
2253 	unsigned long id;
2254 	ino64_t ino;
2255 	int error;
2256 	int length;
2257 
2258 	/* Get file system id and move str to next component. */
2259 	id = strtoul(path, &str, 10);
2260 	if (id == 0 || str[0] != '/') {
2261 		return EINVAL;
2262 	}
2263 	while (*str == '/') {
2264 		str++;
2265 	}
2266 	ch = *str;
2267 
2268 	if (id > INT_MAX) {
2269 		return ENOENT;
2270 	}
2271 	mp = mount_lookupby_volfsid((int)id, 1);
2272 	if (mp == NULL) {
2273 		return EINVAL;  /* unexpected failure */
2274 	}
2275 	/* Check for an alias to a file system root. */
2276 	if (ch == '@' && str[1] == '\0') {
2277 		ino = 2;
2278 		str++;
2279 	} else {
2280 		/* Get file id and move str to next component. */
2281 		ino = strtouq(str, &str, 10);
2282 	}
2283 
2284 	/* Get the target vnode. */
2285 	if (ino == 2) {
2286 		struct vfs_attr vfsattr;
2287 		int use_vfs_root = TRUE;
2288 
2289 		VFSATTR_INIT(&vfsattr);
2290 		VFSATTR_WANTED(&vfsattr, f_capabilities);
2291 		if (vfs_getattr(mp, &vfsattr, vfs_context_kernel()) == 0 &&
2292 		    VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
2293 			if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS) &&
2294 			    (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS)) {
2295 				use_vfs_root = FALSE;
2296 			}
2297 		}
2298 
2299 		if (use_vfs_root) {
2300 			error = VFS_ROOT(mp, &vp, ctx);
2301 		} else {
2302 			error = VFS_VGET(mp, ino, &vp, ctx);
2303 		}
2304 	} else {
2305 		error = VFS_VGET(mp, ino, &vp, ctx);
2306 	}
2307 	vfs_unbusy(mp);
2308 	if (error) {
2309 		goto out;
2310 	}
2311 	realpath[0] = '\0';
2312 
2313 	/* Get the absolute path to this vnode. */
2314 	error = build_path(vp, realpath, (int)bufsize, &length, 0, ctx);
2315 	vnode_put(vp);
2316 
2317 	if (error == 0 && *str != '\0') {
2318 		size_t attempt = strlcat(realpath, str, MAXPATHLEN);
2319 		if (attempt > MAXPATHLEN) {
2320 			error = ENAMETOOLONG;
2321 		}
2322 	}
2323 out:
2324 	return error;
2325 }
2326 #endif
2327 
2328 void
2329 lookup_compound_vnop_post_hook(int error, vnode_t dvp, vnode_t vp, struct nameidata *ndp, int did_create)
2330 {
2331 	if (error == 0 && vp == NULLVP) {
2332 		panic("NULL vp with error == 0.");
2333 	}
2334 
2335 	/*
2336 	 * We don't want to do any of this if we didn't use the compound vnop
2337 	 * to perform the lookup... i.e. if we're allowing and using the legacy pattern,
2338 	 * where we did a full lookup.
2339 	 */
2340 	if ((ndp->ni_flag & NAMEI_COMPOUND_OP_MASK) == 0) {
2341 		return;
2342 	}
2343 
2344 	/*
2345 	 * If we're going to continue the lookup, we'll handle
2346 	 * all lookup-related updates at that time.
2347 	 */
2348 	if (error == EKEEPLOOKING) {
2349 		return;
2350 	}
2351 
2352 	/*
2353 	 * Only audit or update cache for *found* vnodes.  For creation
2354 	 * neither would happen in the non-compound-vnop case.
2355 	 */
2356 	if ((vp != NULLVP) && !did_create) {
2357 		/*
2358 		 * If MAKEENTRY isn't set, and we've done a successful compound VNOP,
2359 		 * then we certainly don't want to update cache or identity.
2360 		 */
2361 		if ((error != 0) || (ndp->ni_cnd.cn_flags & MAKEENTRY)) {
2362 			lookup_consider_update_cache(dvp, vp, &ndp->ni_cnd, ndp->ni_ncgeneration);
2363 		}
2364 		if (ndp->ni_cnd.cn_flags & AUDITVNPATH1) {
2365 			AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2366 		} else if (ndp->ni_cnd.cn_flags & AUDITVNPATH2) {
2367 			AUDIT_ARG(vnpath, vp, ARG_VNODE2);
2368 		}
2369 	}
2370 
2371 	/*
2372 	 * If you created (whether you opened or not), cut a lookup tracepoint
2373 	 * for the parent dir (as would happen without a compound vnop).  Note: we may need
2374 	 * a vnode despite failure in this case!
2375 	 *
2376 	 * If you did not create:
2377 	 *      Found child (succeeded or not): cut a tracepoint for the child.
2378 	 *      Did not find child: cut a tracepoint with the parent.
2379 	 */
2380 	if (kdebug_enable) {
2381 		kdebug_lookup(vp ? vp : dvp, &ndp->ni_cnd);
2382 	}
2383 }
2384