1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_lookup.c 8.10 (Berkeley) 5/27/95
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/syslimits.h>
78 #include <sys/time.h>
79 #include <sys/namei.h>
80 #include <sys/vm.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/errno.h>
84 #include <kern/kalloc.h>
85 #include <sys/filedesc.h>
86 #include <sys/proc_internal.h>
87 #include <sys/kdebug.h>
88 #include <sys/unistd.h> /* For _PC_NAME_MAX */
89 #include <sys/uio_internal.h>
90 #include <sys/kauth.h>
91 #include <kern/zalloc.h>
92 #include <security/audit/audit.h>
93 #if CONFIG_MACF
94 #include <security/mac_framework.h>
95 #endif
96 #include <os/atomic_private.h>
97
98 #include <sys/paths.h>
99
100 #if NAMEDRSRCFORK
101 #include <sys/xattr.h>
102 #endif
103 /*
104 * The minimum volfs-style pathname is 9.
105 * Example: "/.vol/1/2"
106 */
107 #define VOLFS_MIN_PATH_LEN 9
108
109
110 #if CONFIG_VOLFS
111 static int vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_t ctx);
112 #define MAX_VOLFS_RESTARTS 5
113 #endif
114
115 static int lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx);
116 static int lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, bool* dp_has_iocount, vfs_context_t ctx);
117 static int lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx);
118 static void lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation);
119 static int lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly,
120 int vbusyflags, int *keep_going, int nc_generation,
121 int wantparent, int atroot, vfs_context_t ctx);
122 static int lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent);
123
124 #if NAMEDRSRCFORK
125 static int lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx);
126 #endif
127
128 extern lck_rw_t rootvnode_rw_lock;
129
130 #define RESOLVE_NOFOLLOW_ANY 0x00000001
131 #define RESOLVE_CHECKED 0x80000000
132 static int lookup_check_for_resolve_prefix(char *path, size_t pathbuflen, size_t len, uint32_t *resolve_flags, size_t *prefix_len);
133
134 /*
135 * Convert a pathname into a pointer to a locked inode.
136 *
137 * The FOLLOW flag is set when symbolic links are to be followed
138 * when they occur at the end of the name translation process.
139 * Symbolic links are always followed for all other pathname
140 * components other than the last.
141 *
142 * The segflg defines whether the name is to be copied from user
143 * space or kernel space.
144 *
145 * Overall outline of namei:
146 *
147 * copy in name
148 * get starting directory
149 * while (!done && !error) {
150 * call lookup to search path.
151 * if symbolic link, massage name in buffer and continue
152 * }
153 *
154 * Returns: 0 Success
155 * ENOENT No such file or directory
156 * ELOOP Too many levels of symbolic links
157 * ENAMETOOLONG Filename too long
158 * copyinstr:EFAULT Bad address
159 * copyinstr:ENAMETOOLONG Filename too long
160 * lookup:EBADF Bad file descriptor
161 * lookup:EROFS
162 * lookup:EACCES
163 * lookup:EPERM
164 * lookup:ERECYCLE vnode was recycled from underneath us in lookup.
165 * This means we should re-drive lookup from this point.
166 * lookup: ???
167 * VNOP_READLINK:???
168 */
169 int
namei(struct nameidata * ndp)170 namei(struct nameidata *ndp)
171 {
172 struct vnode *dp; /* the directory we are searching */
173 struct vnode *usedvp = ndp->ni_dvp; /* store pointer to vp in case we must loop due to
174 * heavy vnode pressure */
175 uint32_t cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */
176 int error;
177 struct componentname *cnp = &ndp->ni_cnd;
178 vfs_context_t ctx = cnp->cn_context;
179 proc_t p = vfs_context_proc(ctx);
180 #if CONFIG_AUDIT
181 /* XXX ut should be from context */
182 uthread_t ut = current_uthread();
183 #endif
184
185 #if CONFIG_VOLFS
186 int volfs_restarts = 0;
187 #endif
188 size_t bytes_copied = 0;
189 size_t resolve_prefix_len = 0;
190 vnode_t rootdir_with_usecount = NULLVP;
191 vnode_t startdir_with_usecount = NULLVP;
192 vnode_t usedvp_dp = NULLVP;
193 int32_t old_count = 0;
194 uint32_t resolve_flags = 0;
195 int resolve_error = 0;
196 bool dp_has_iocount = false;
197 bool clear_usedvp = false;
198
199 #if DIAGNOSTIC
200 if (!vfs_context_ucred(ctx) || !p) {
201 panic("namei: bad cred/proc");
202 }
203 if (cnp->cn_nameiop & (~OPMASK)) {
204 panic("namei: nameiop contaminated with flags");
205 }
206 if (cnp->cn_flags & OPMASK) {
207 panic("namei: flags contaminated with nameiops");
208 }
209 #endif
210
211 /*
212 * A compound VNOP found something that needs further processing:
213 * either a trigger vnode, a covered directory, or a symlink.
214 */
215 if (ndp->ni_flag & NAMEI_CONTLOOKUP) {
216 int rdonly, vbusyflags, keep_going, wantparent;
217
218 rdonly = cnp->cn_flags & RDONLY;
219 vbusyflags = ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) ? LK_NOWAIT : 0;
220 keep_going = 0;
221 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
222
223 ndp->ni_flag &= ~(NAMEI_CONTLOOKUP);
224
225 error = lookup_handle_found_vnode(ndp, &ndp->ni_cnd, rdonly, vbusyflags,
226 &keep_going, ndp->ni_ncgeneration, wantparent, 0, ctx);
227 if (error) {
228 goto out_drop;
229 }
230 if (keep_going) {
231 if ((cnp->cn_flags & ISSYMLINK) == 0) {
232 panic("We need to keep going on a continued lookup, but for vp type %d (tag %d)", ndp->ni_vp->v_type, ndp->ni_vp->v_tag);
233 }
234 goto continue_symlink;
235 }
236
237 return 0;
238 }
239
240 vnode_recycled:
241
242 /*
243 * Get a buffer for the name to be translated, and copy the
244 * name into the buffer.
245 */
246 if ((cnp->cn_flags & HASBUF) == 0) {
247 cnp->cn_pnbuf = ndp->ni_pathbuf;
248 cnp->cn_pnlen = PATHBUFLEN;
249 }
250
251 retry_copy:
252 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
253 error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
254 cnp->cn_pnlen, &bytes_copied);
255 } else {
256 error = copystr(CAST_DOWN(void *, ndp->ni_dirp), cnp->cn_pnbuf,
257 cnp->cn_pnlen, &bytes_copied);
258 }
259 if (error == ENAMETOOLONG && !(cnp->cn_flags & HASBUF)) {
260 if (bytes_copied == PATHBUFLEN) {
261 resolve_error = lookup_check_for_resolve_prefix(cnp->cn_pnbuf, PATHBUFLEN,
262 PATHBUFLEN, &resolve_flags, &resolve_prefix_len);
263 /* errors from copyinstr take precedence over resolve_error */
264 if (!resolve_error && resolve_prefix_len) {
265 ndp->ni_dirp += resolve_prefix_len;
266 resolve_prefix_len = 0;
267 }
268 }
269
270 cnp->cn_pnbuf = zalloc(ZV_NAMEI);
271 cnp->cn_flags |= HASBUF;
272 cnp->cn_pnlen = MAXPATHLEN;
273 bytes_copied = 0;
274
275 goto retry_copy;
276 } else if (error == ENAMETOOLONG && (cnp->cn_flags & HASBUF) &&
277 (cnp->cn_pnlen * 2) <= MAXLONGPATHLEN && proc_support_long_paths(p)) {
278 if (cnp->cn_pnlen == MAXPATHLEN) {
279 /* First time we arrive here, the buffer came from ZV_NAMEI */
280 zfree(ZV_NAMEI, cnp->cn_pnbuf);
281 } else {
282 kfree_data(cnp->cn_pnbuf, cnp->cn_pnlen);
283 }
284
285 resolve_error = 0;
286
287 cnp->cn_pnlen *= 2;
288 cnp->cn_pnbuf = kalloc_data(cnp->cn_pnlen, Z_WAITOK | Z_ZERO | Z_NOFAIL);
289 bytes_copied = 0;
290
291 goto retry_copy;
292 }
293 if (error) {
294 goto error_out;
295 } else if (resolve_error) {
296 error = resolve_error;
297 goto error_out;
298 }
299 assert(bytes_copied <= cnp->cn_pnlen);
300 ndp->ni_pathlen = (u_int)bytes_copied;
301 bytes_copied = 0;
302
303 if (!(resolve_flags & RESOLVE_CHECKED)) {
304 assert(!(cnp->cn_flags & HASBUF) && (cnp->cn_pnlen == PATHBUFLEN));
305 error = lookup_check_for_resolve_prefix(cnp->cn_pnbuf, cnp->cn_pnlen, ndp->ni_pathlen,
306 &resolve_flags, &resolve_prefix_len);
307 if (error) {
308 goto error_out;
309 }
310 if (resolve_prefix_len) {
311 /*
312 * Since this is pointing to the static path buffer instead of a zalloc'ed memorry,
313 * we're not going to attempt to free this, so it is perfectly fine to change the
314 * value of cnp->cn_pnbuf.
315 */
316 cnp->cn_pnbuf += resolve_prefix_len;
317 cnp->cn_pnlen -= resolve_prefix_len;
318 ndp->ni_pathlen -= resolve_prefix_len;
319 resolve_prefix_len = 0;
320 }
321 }
322
323 /* At this point we should have stripped off the prefix from the path that has to be looked up */
324 assert((resolve_flags & RESOLVE_CHECKED) && (resolve_prefix_len == 0));
325
326 /*
327 * Since the name cache may contain positive entries of
328 * the incorrect case, force lookup() to bypass the cache
329 * and call directly into the filesystem for each path
330 * component. Note: the FS may still consult the cache,
331 * but can apply rules to validate the results.
332 */
333 if (proc_is_forcing_hfs_case_sensitivity(p)) {
334 cnp->cn_flags |= CN_SKIPNAMECACHE;
335 }
336
337 #if CONFIG_VOLFS
338 /*
339 * Check for legacy volfs style pathnames.
340 *
341 * For compatibility reasons we currently allow these paths,
342 * but future versions of the OS may not support them.
343 */
344 if (ndp->ni_pathlen >= VOLFS_MIN_PATH_LEN &&
345 cnp->cn_pnbuf[0] == '/' &&
346 cnp->cn_pnbuf[1] == '.' &&
347 cnp->cn_pnbuf[2] == 'v' &&
348 cnp->cn_pnbuf[3] == 'o' &&
349 cnp->cn_pnbuf[4] == 'l' &&
350 cnp->cn_pnbuf[5] == '/') {
351 char * realpath;
352 size_t realpathlen;
353 int realpath_err;
354 /* Attempt to resolve a legacy volfs style pathname. */
355
356 realpathlen = MAXPATHLEN;
357 do {
358 if (realpathlen == MAXPATHLEN) {
359 realpath = zalloc(ZV_NAMEI);
360 } else {
361 /*
362 * To be consistent with the behavior of openbyid_np, which always supports
363 * long paths, do not gate our support on proc_support_long_paths either.
364 */
365 realpath = kalloc_data(realpathlen, Z_WAITOK | Z_ZERO | Z_NOFAIL);
366 }
367 /*
368 * We only error out on the ENAMETOOLONG cases where we know that
369 * vfs_getrealpath translation succeeded but the path could not fit into
370 * realpathlen characters. In other failure cases, we may be dealing with a path
371 * that legitimately looks like /.vol/1234/567 and is not meant to be translated
372 */
373 if ((realpath_err = vfs_getrealpath(&cnp->cn_pnbuf[6], realpath, realpathlen, ctx))) {
374 if (realpathlen == MAXPATHLEN) {
375 zfree(ZV_NAMEI, realpath);
376 } else {
377 kfree_data(realpath, realpathlen);
378 }
379 if (realpath_err == ENOSPC || realpath_err == ENAMETOOLONG) {
380 error = ENAMETOOLONG;
381 }
382 } else {
383 size_t tmp_len;
384 if (cnp->cn_flags & HASBUF) {
385 if (cnp->cn_pnlen == MAXPATHLEN) {
386 zfree(ZV_NAMEI, cnp->cn_pnbuf);
387 } else {
388 kfree_data(cnp->cn_pnbuf, cnp->cn_pnlen);
389 }
390 }
391 cnp->cn_pnbuf = realpath;
392 cnp->cn_pnlen = (int)realpathlen;
393 tmp_len = strlen(realpath) + 1;
394 assert(tmp_len <= UINT_MAX);
395 ndp->ni_pathlen = (u_int)tmp_len;
396 cnp->cn_flags |= HASBUF | CN_VOLFSPATH;
397 error = 0;
398 }
399 } while (error == ENAMETOOLONG && (realpathlen *= 2) && realpathlen <= MAXLONGPATHLEN);
400
401 if (error) {
402 goto error_out;
403 }
404 }
405 #endif /* CONFIG_VOLFS */
406
407 #if CONFIG_AUDIT
408 /* If we are auditing the kernel pathname, save the user pathname */
409 if (cnp->cn_flags & AUDITVNPATH1) {
410 AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH1);
411 }
412 if (cnp->cn_flags & AUDITVNPATH2) {
413 AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH2);
414 }
415 #endif /* CONFIG_AUDIT */
416
417 /*
418 * Do not allow empty pathnames
419 */
420 if (*cnp->cn_pnbuf == '\0') {
421 error = ENOENT;
422 goto error_out;
423 }
424 if (ndp->ni_flag & NAMEI_NOFOLLOW_ANY || (resolve_flags & RESOLVE_NOFOLLOW_ANY)) {
425 ndp->ni_loopcnt = MAXSYMLINKS;
426 } else {
427 ndp->ni_loopcnt = 0;
428 }
429
430 /*
431 * determine the starting point for the translation.
432 */
433 proc_dirs_lock_shared(p);
434 lck_rw_lock_shared(&rootvnode_rw_lock);
435
436 if (!(ndp->ni_flag & NAMEI_ROOTDIR)) {
437 if (fdt_flag_test(&p->p_fd, FD_CHROOT)) {
438 ndp->ni_rootdir = p->p_fd.fd_rdir;
439 } else {
440 ndp->ni_rootdir = rootvnode;
441 }
442 }
443
444 if (!ndp->ni_rootdir) {
445 if (ndp->ni_flag & NAMEI_ROOTDIR) {
446 panic("NAMEI_ROOTDIR is set but ni_rootdir is not\n");
447 } else if (fdt_flag_test(&p->p_fd, FD_CHROOT)) {
448 /* This should be a panic */
449 printf("p->p_fd.fd_rdir is not set\n");
450 } else {
451 printf("rootvnode is not set\n");
452 }
453 lck_rw_unlock_shared(&rootvnode_rw_lock);
454 proc_dirs_unlock_shared(p);
455 error = ENOENT;
456 goto error_out;
457 }
458
459 cnp->cn_nameptr = cnp->cn_pnbuf;
460
461 ndp->ni_usedvp = NULLVP;
462
463 if (*(cnp->cn_nameptr) == '/') {
464 while (*(cnp->cn_nameptr) == '/') {
465 cnp->cn_nameptr++;
466 ndp->ni_pathlen--;
467 }
468 if (ndp->ni_flag & NAMEI_RESOLVE_BENEATH) {
469 /* Absolute paths are never allowed in NAMEI_RESOLVE_BENEATH */
470 lck_rw_unlock_shared(&rootvnode_rw_lock);
471 proc_dirs_unlock_shared(p);
472 error = EACCES;
473 goto error_out;
474 }
475 dp = ndp->ni_rootdir;
476 } else if (cnp->cn_flags & USEDVP) {
477 dp = ndp->ni_dvp;
478 ndp->ni_usedvp = dp;
479 usedvp_dp = dp;
480 } else {
481 dp = vfs_context_cwd(ctx);
482 if (ndp->ni_flag & NAMEI_RESOLVE_BENEATH) {
483 /* Store the starting directory because it can change after a symlink traversal */
484 ndp->ni_usedvp = dp;
485 clear_usedvp = true;
486 }
487 }
488
489 if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) {
490 dp = NULLVP;
491 lck_rw_unlock_shared(&rootvnode_rw_lock);
492 proc_dirs_unlock_shared(p);
493 error = ENOENT;
494 goto error_out;
495 }
496
497 /*
498 * We need our own usecount on the root vnode and the starting dir across
499 * the lookup. There's two things that be done here. We can hold the locks
500 * (which protect the existing usecounts on the directories) across the
501 * lookup or take our own usecount. Holding the locks across the lookup can
502 * cause deadlock issues if we re-enter namei on the same thread so the
503 * correct thing to do is to acquire our own usecount.
504 *
505 * Ideally, the usecount should be obtained by vnode_get->vnode_ref->vnode_put.
506 * However when this vnode is the rootvnode, that sequence will produce a
507 * lot of vnode mutex locks and unlocks on a single vnode (the rootvnode)
508 * and will be highly contended and degrade performance. Since we have
509 * an existing usecount protected by the locks we hold, we'll just use
510 * an atomic op to increment the usecount on a vnode which already has one
511 * and can't be released because we have the locks which protect against that
512 * happening.
513 */
514 rootdir_with_usecount = ndp->ni_rootdir;
515 old_count = os_atomic_inc_orig(&rootdir_with_usecount->v_usecount, relaxed);
516 if (old_count < 1) {
517 panic("(1) invalid pre-increment usecount (%d) for rootdir vnode %p",
518 old_count, rootdir_with_usecount);
519 } else if (old_count == INT32_MAX) {
520 panic("(1) usecount overflow for vnode %p", rootdir_with_usecount);
521 }
522
523 if ((dp != rootdir_with_usecount) && (dp != usedvp_dp)) {
524 old_count = os_atomic_inc_orig(&dp->v_usecount, relaxed);
525 if (old_count < 1) {
526 panic("(2) invalid pre-increment usecount (%d) for vnode %p", old_count, dp);
527 } else if (old_count == INT32_MAX) {
528 panic("(2) usecount overflow for vnode %p", dp);
529 }
530 startdir_with_usecount = dp;
531 }
532
533 /* Now that we have our usecount, release the locks */
534 lck_rw_unlock_shared(&rootvnode_rw_lock);
535 proc_dirs_unlock_shared(p);
536
537 ndp->ni_dvp = NULLVP;
538 ndp->ni_vp = NULLVP;
539
540 for (;;) {
541 #if CONFIG_MACF
542 /*
543 * Give MACF policies a chance to reject the lookup
544 * before performing any filesystem operations.
545 * This hook is called before resolving the path and
546 * again each time a symlink is encountered.
547 * NB: policies receive path information as supplied
548 * by the caller and thus cannot be trusted.
549 */
550 error = mac_vnode_check_lookup_preflight(ctx, dp, cnp->cn_nameptr, cnp->cn_namelen);
551 if (error) {
552 goto error_out;
553 }
554 #endif
555 ndp->ni_startdir = dp;
556 dp = NULLVP;
557
558 if ((error = lookup(ndp))) {
559 goto error_out;
560 }
561
562 /*
563 * Check for symbolic link
564 */
565 if ((cnp->cn_flags & ISSYMLINK) == 0) {
566 if (startdir_with_usecount) {
567 vnode_rele(startdir_with_usecount);
568 startdir_with_usecount = NULLVP;
569 }
570 if (rootdir_with_usecount) {
571 lck_rw_lock_shared(&rootvnode_rw_lock);
572 if (rootdir_with_usecount == rootvnode) {
573 old_count = os_atomic_dec_orig(&rootdir_with_usecount->v_usecount, relaxed);
574 if (old_count < 2) {
575 /*
576 * There needs to have been at least 1 usecount left on the rootvnode
577 */
578 panic("(3) Unexpected pre-decrement value (%d) of usecount for rootvnode %p",
579 old_count, rootdir_with_usecount);
580 }
581 rootdir_with_usecount = NULLVP;
582 }
583 lck_rw_unlock_shared(&rootvnode_rw_lock);
584 if (rootdir_with_usecount) {
585 vnode_rele(rootdir_with_usecount);
586 rootdir_with_usecount = NULLVP;
587 }
588 }
589
590 return 0;
591 }
592
593 continue_symlink:
594 /* Gives us a new path to process, and a starting dir */
595 error = lookup_handle_symlink(ndp, &dp, &dp_has_iocount, ctx);
596 if (error != 0) {
597 break;
598 }
599 if (dp_has_iocount) {
600 if ((dp != rootdir_with_usecount) && (dp != startdir_with_usecount) &&
601 (dp != usedvp_dp)) {
602 if (startdir_with_usecount) {
603 vnode_rele(startdir_with_usecount);
604 }
605 vnode_ref_ext(dp, 0, VNODE_REF_FORCE);
606 startdir_with_usecount = dp;
607 }
608 vnode_put(dp);
609 dp_has_iocount = false;
610 }
611 }
612 /*
613 * only come here if we fail to handle a SYMLINK...
614 * if either ni_dvp or ni_vp is non-NULL, then
615 * we need to drop the iocount that was picked
616 * up in the lookup routine
617 */
618 out_drop:
619 if (ndp->ni_dvp) {
620 vnode_put(ndp->ni_dvp);
621 }
622 if (ndp->ni_vp) {
623 vnode_put(ndp->ni_vp);
624 }
625 error_out:
626 if (clear_usedvp) {
627 ndp->ni_usedvp = NULLVP;
628 }
629 if (startdir_with_usecount) {
630 vnode_rele(startdir_with_usecount);
631 startdir_with_usecount = NULLVP;
632 }
633 if (rootdir_with_usecount) {
634 lck_rw_lock_shared(&rootvnode_rw_lock);
635 if (rootdir_with_usecount == rootvnode) {
636 old_count = os_atomic_dec_orig(&rootdir_with_usecount->v_usecount, relaxed);
637 if (old_count < 2) {
638 /*
639 * There needs to have been at least 1 usecount left on the rootvnode
640 */
641 panic("(4) Unexpected pre-decrement value (%d) of usecount for rootvnode %p",
642 old_count, rootdir_with_usecount);
643 }
644 lck_rw_unlock_shared(&rootvnode_rw_lock);
645 } else {
646 lck_rw_unlock_shared(&rootvnode_rw_lock);
647 vnode_rele(rootdir_with_usecount);
648 }
649 rootdir_with_usecount = NULLVP;
650 }
651
652 if ((cnp->cn_flags & HASBUF)) {
653 cnp->cn_flags &= ~HASBUF;
654 if (cnp->cn_pnlen == MAXPATHLEN) {
655 zfree(ZV_NAMEI, cnp->cn_pnbuf);
656 } else {
657 kfree_data(cnp->cn_pnbuf, cnp->cn_pnlen);
658 }
659 }
660 cnp->cn_pnbuf = NULL;
661 ndp->ni_vp = NULLVP;
662 ndp->ni_dvp = NULLVP;
663
664 #if CONFIG_VOLFS
665 /*
666 * Deal with volfs fallout.
667 *
668 * At this point, if we were originally given a volfs path that
669 * looks like /.vol/123/456, then we would have had to convert it into
670 * a full path. Assuming that part worked properly, we will now attempt
671 * to conduct a lookup of the item in the namespace. Under normal
672 * circumstances, if a user looked up /tmp/foo and it was not there, it
673 * would be permissible to return ENOENT.
674 *
675 * However, we may not want to do that here. Specifically, the volfs path
676 * uniquely identifies a certain item in the namespace regardless of where it
677 * lives. If the item has moved in between the time we constructed the
678 * path and now, when we're trying to do a lookup/authorization on the full
679 * path, we may have gotten an ENOENT.
680 *
681 * At this point we can no longer tell if the path no longer exists
682 * or if the item in question no longer exists. It could have been renamed
683 * away, in which case the /.vol identifier is still valid.
684 *
685 * Do this dance a maximum of MAX_VOLFS_RESTARTS times.
686 */
687 if ((error == ENOENT) && (ndp->ni_cnd.cn_flags & CN_VOLFSPATH)) {
688 if (volfs_restarts < MAX_VOLFS_RESTARTS) {
689 volfs_restarts++;
690 goto vnode_recycled;
691 }
692 }
693 #endif
694
695 if (error == ERECYCLE) {
696 /* vnode was recycled underneath us. re-drive lookup to start at
697 * the beginning again, since recycling invalidated last lookup*/
698 ndp->ni_cnd.cn_flags = cnpflags;
699 ndp->ni_dvp = usedvp;
700 goto vnode_recycled;
701 }
702
703
704 return error;
705 }
706
707 int
namei_compound_available(vnode_t dp,struct nameidata * ndp)708 namei_compound_available(vnode_t dp, struct nameidata *ndp)
709 {
710 if ((ndp->ni_flag & NAMEI_COMPOUNDOPEN) != 0) {
711 return vnode_compound_open_available(dp);
712 }
713
714 return 0;
715 }
716
717 static int
lookup_check_for_resolve_prefix(char * path,size_t pathbuflen,size_t len,uint32_t * resolve_flags,size_t * prefix_len)718 lookup_check_for_resolve_prefix(char *path, size_t pathbuflen, size_t len, uint32_t *resolve_flags, size_t *prefix_len)
719 {
720 int error = 0;
721 *resolve_flags = (uint32_t)RESOLVE_CHECKED;
722 *prefix_len = 0;
723
724 if (len < (sizeof("/.nofollow/") - 1) || path[0] != '/' || path[1] != '.') {
725 return 0;
726 }
727
728 if ((strncmp(&path[2], "nofollow/", (sizeof("nofollow/") - 1)) == 0)) {
729 *resolve_flags |= RESOLVE_NOFOLLOW_ANY;
730 *prefix_len = sizeof("/.nofollow") - 1;
731 } else if ((len >= sizeof("/.resolve/1/") - 1) &&
732 strncmp(&path[2], "resolve/", (sizeof("resolve/") - 1)) == 0) {
733 char * flag = path + (sizeof("/.resolve/") - 1);
734 char *next = flag;
735 char last_char = path[pathbuflen - 1];
736
737 /* no leading zeroes or non digits */
738 if ((flag[0] == '0' && flag[1] != '/') ||
739 flag[0] < '0' || flag[0] > '9') {
740 error = EINVAL;
741 goto out;
742 }
743
744 path[pathbuflen - 1] = '\0';
745 unsigned long flag_val = strtoul(flag, &next, 10);
746 path[pathbuflen - 1] = last_char;
747 if (next[0] != '/' || (flag_val & ~(RESOLVE_NOFOLLOW_ANY))) {
748 error = EINVAL;
749 goto out;
750 }
751 assert(next >= flag);
752 *resolve_flags |= (uint32_t)flag_val;
753 *prefix_len = (size_t)(next - path);
754 }
755 out:
756 assert(*prefix_len <= sizeof("/.resolve/2147483647"));
757 return error;
758 }
759
760 static int
lookup_authorize_search(vnode_t dp,struct componentname * cnp,int dp_authorized_in_cache,vfs_context_t ctx)761 lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx)
762 {
763 #if !CONFIG_MACF
764 #pragma unused(cnp)
765 #endif
766
767 int error;
768
769 if (!dp_authorized_in_cache) {
770 error = vnode_authorize(dp, NULL, KAUTH_VNODE_SEARCH, ctx);
771 if (error) {
772 return error;
773 }
774 }
775 #if CONFIG_MACF
776 error = mac_vnode_check_lookup(ctx, dp, cnp);
777 if (error) {
778 return error;
779 }
780 #endif /* CONFIG_MACF */
781
782 return 0;
783 }
784
785 static void
lookup_consider_update_cache(vnode_t dvp,vnode_t vp,struct componentname * cnp,int nc_generation)786 lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation)
787 {
788 int isdot_or_dotdot;
789 isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT);
790
791 if (vp->v_name == NULL || vp->v_parent == NULLVP) {
792 int update_flags = 0;
793
794 if (isdot_or_dotdot == 0) {
795 if (vp->v_name == NULL) {
796 update_flags |= VNODE_UPDATE_NAME;
797 }
798 if (dvp != NULLVP && vp->v_parent == NULLVP) {
799 update_flags |= VNODE_UPDATE_PARENT;
800 }
801
802 if (update_flags) {
803 vnode_update_identity(vp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, update_flags);
804 }
805 }
806 }
807 if ((cnp->cn_flags & MAKEENTRY) && (vp->v_flag & VNCACHEABLE) && LIST_FIRST(&vp->v_nclinks) == NULL) {
808 /*
809 * missing from name cache, but should
810 * be in it... this can happen if volfs
811 * causes the vnode to be created or the
812 * name cache entry got recycled but the
813 * vnode didn't...
814 * check to make sure that ni_dvp is valid
815 * cache_lookup_path may return a NULL
816 * do a quick check to see if the generation of the
817 * directory matches our snapshot... this will get
818 * rechecked behind the name cache lock, but if it
819 * already fails to match, no need to go any further
820 */
821 if (dvp != NULLVP && (nc_generation == dvp->v_nc_generation) && (!isdot_or_dotdot)) {
822 cache_enter_with_gen(dvp, vp, cnp, nc_generation);
823 }
824 }
825 }
826
827 #if NAMEDRSRCFORK
828 /*
829 * Can change ni_dvp and ni_vp. On success, returns with iocounts on stream vnode (always) and
830 * data fork if requested. On failure, returns with iocount data fork (always) and its parent directory
831 * (if one was provided).
832 */
833 static int
lookup_handle_rsrc_fork(vnode_t dp,struct nameidata * ndp,struct componentname * cnp,int wantparent,vfs_context_t ctx)834 lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx)
835 {
836 vnode_t svp = NULLVP;
837 enum nsoperation nsop;
838 int nsflags;
839 int error;
840
841 if (dp->v_type != VREG) {
842 error = ENOENT;
843 goto out;
844 }
845 switch (cnp->cn_nameiop) {
846 case DELETE:
847 if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
848 nsop = NS_DELETE;
849 } else {
850 error = EPERM;
851 goto out;
852 }
853 break;
854 case CREATE:
855 if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
856 nsop = NS_CREATE;
857 } else {
858 error = EPERM;
859 goto out;
860 }
861 break;
862 case LOOKUP:
863 /* Make sure our lookup of "/..namedfork/rsrc" is allowed. */
864 if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
865 nsop = NS_OPEN;
866 } else {
867 error = EPERM;
868 goto out;
869 }
870 break;
871 default:
872 error = EPERM;
873 goto out;
874 }
875
876 nsflags = 0;
877 if (cnp->cn_flags & CN_RAW_ENCRYPTED) {
878 nsflags |= NS_GETRAWENCRYPTED;
879 }
880
881 /* Ask the file system for the resource fork. */
882 error = vnode_getnamedstream(dp, &svp, XATTR_RESOURCEFORK_NAME, nsop, nsflags, ctx);
883
884 /* During a create, it OK for stream vnode to be missing. */
885 if (error == ENOATTR || error == ENOENT) {
886 error = (nsop == NS_CREATE) ? 0 : ENOENT;
887 }
888 if (error) {
889 goto out;
890 }
891 /* The "parent" of the stream is the file. */
892 if (wantparent) {
893 if (ndp->ni_dvp) {
894 vnode_put(ndp->ni_dvp);
895 }
896 ndp->ni_dvp = dp;
897 } else {
898 vnode_put(dp);
899 }
900 ndp->ni_vp = svp; /* on create this may be null */
901
902 /* Restore the truncated pathname buffer (for audits). */
903 if (ndp->ni_pathlen == 1 && ndp->ni_next[0] == '\0') {
904 /*
905 * While we replaced only '/' with '\0' and would ordinarily
906 * need to just switch that back, the buffer in which we did
907 * this may not be what the pathname buffer is now when symlinks
908 * are involved. If we just restore the "/" we will make the
909 * string not terminated anymore, so be safe and restore the
910 * entire suffix.
911 */
912 strncpy(ndp->ni_next, _PATH_RSRCFORKSPEC, sizeof(_PATH_RSRCFORKSPEC));
913 cnp->cn_nameptr = ndp->ni_next + 1;
914 cnp->cn_namelen = sizeof(_PATH_RSRCFORKSPEC) - 1;
915 ndp->ni_next += cnp->cn_namelen;
916 if (ndp->ni_next[0] != '\0') {
917 panic("Incorrect termination of path in %s", __FUNCTION__);
918 }
919 }
920 cnp->cn_flags &= ~MAKEENTRY;
921
922 return 0;
923 out:
924 return error;
925 }
926 #endif /* NAMEDRSRCFORK */
927
928 /*
929 * iocounts in:
930 * --One on ni_vp. One on ni_dvp if there is more path, or we didn't come through the
931 * cache, or we came through the cache and the caller doesn't want the parent.
932 *
933 * iocounts out:
934 * --Leaves us in the correct state for the next step, whatever that might be.
935 * --If we find a symlink, returns with iocounts on both ni_vp and ni_dvp.
936 * --If we are to look up another component, then we have an iocount on ni_vp and
937 * nothing else.
938 * --If we are done, returns an iocount on ni_vp, and possibly on ni_dvp depending on nameidata flags.
939 * --In the event of an error, may return with ni_dvp NULL'ed out (in which case, iocount
940 * was dropped).
941 */
942 static int
lookup_handle_found_vnode(struct nameidata * ndp,struct componentname * cnp,int rdonly,int vbusyflags,int * keep_going,int nc_generation,int wantparent,int atroot,vfs_context_t ctx)943 lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly,
944 int vbusyflags, int *keep_going, int nc_generation,
945 int wantparent, int atroot, vfs_context_t ctx)
946 {
947 vnode_t dp;
948 int error;
949 char *cp;
950
951 dp = ndp->ni_vp;
952 *keep_going = 0;
953
954 if (ndp->ni_vp == NULLVP) {
955 panic("NULL ni_vp in %s", __FUNCTION__);
956 }
957
958 if (atroot) {
959 goto nextname;
960 }
961
962 /*
963 * Take into account any additional components consumed by
964 * the underlying filesystem.
965 */
966 if (cnp->cn_consume > 0) {
967 cnp->cn_nameptr += cnp->cn_consume;
968 ndp->ni_next += cnp->cn_consume;
969 ndp->ni_pathlen -= cnp->cn_consume;
970 cnp->cn_consume = 0;
971 } else {
972 lookup_consider_update_cache(ndp->ni_dvp, dp, cnp, nc_generation);
973 }
974
975 /*
976 * Check to see if the vnode has been mounted on...
977 * if so find the root of the mounted file system.
978 * Updates ndp->ni_vp.
979 */
980 error = lookup_traverse_mountpoints(ndp, cnp, dp, vbusyflags, ctx);
981 dp = ndp->ni_vp;
982 if (error) {
983 goto out;
984 }
985
986 #if CONFIG_MACF
987 if (vfs_flags(vnode_mount(dp)) & MNT_MULTILABEL) {
988 error = vnode_label(vnode_mount(dp), NULL, dp, NULL, 0, ctx);
989 if (error) {
990 goto out;
991 }
992 }
993 #endif
994
995 /*
996 * Check for symbolic link
997 */
998 if ((dp->v_type == VLNK) &&
999 ((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) {
1000 cnp->cn_flags |= ISSYMLINK;
1001 *keep_going = 1;
1002 return 0;
1003 }
1004
1005 /*
1006 * Check for bogus trailing slashes.
1007 */
1008 if ((ndp->ni_flag & NAMEI_TRAILINGSLASH)) {
1009 if (dp->v_type != VDIR) {
1010 error = ENOTDIR;
1011 goto out;
1012 }
1013 ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
1014 }
1015
1016 #if NAMEDSTREAMS
1017 /*
1018 * Deny namei/lookup requests to resolve paths that point to shadow files.
1019 * Access to shadow files must be conducted by explicit calls to VNOP_LOOKUP
1020 * directly, and not use lookup/namei
1021 */
1022 if (vnode_isshadow(dp)) {
1023 error = ENOENT;
1024 goto out;
1025 }
1026 #endif
1027
1028 nextname:
1029 /*
1030 * Not a symbolic link. If more pathname,
1031 * continue at next component, else return.
1032 *
1033 * Definitely have a dvp if there's another slash
1034 */
1035 if (*ndp->ni_next == '/') {
1036 cnp->cn_nameptr = ndp->ni_next + 1;
1037 ndp->ni_pathlen--;
1038 while (*cnp->cn_nameptr == '/') {
1039 cnp->cn_nameptr++;
1040 ndp->ni_pathlen--;
1041 }
1042
1043 cp = cnp->cn_nameptr;
1044 vnode_put(ndp->ni_dvp);
1045 ndp->ni_dvp = NULLVP;
1046
1047 if (*cp == '\0') {
1048 goto emptyname;
1049 }
1050
1051 *keep_going = 1;
1052 return 0;
1053 }
1054
1055 /*
1056 * Disallow directory write attempts on read-only file systems.
1057 */
1058 if (rdonly &&
1059 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
1060 error = EROFS;
1061 goto out;
1062 }
1063
1064 /* If SAVESTART is set, we should have a dvp */
1065 if (cnp->cn_flags & SAVESTART) {
1066 /*
1067 * note that we already hold a reference
1068 * on both dp and ni_dvp, but for some reason
1069 * can't get another one... in this case we
1070 * need to do vnode_put on dp in 'bad2'
1071 */
1072 if ((vnode_get(ndp->ni_dvp))) {
1073 error = ENOENT;
1074 goto out;
1075 }
1076 ndp->ni_startdir = ndp->ni_dvp;
1077 }
1078 if (!wantparent && ndp->ni_dvp) {
1079 vnode_put(ndp->ni_dvp);
1080 ndp->ni_dvp = NULLVP;
1081 }
1082
1083 if (cnp->cn_flags & AUDITVNPATH1) {
1084 AUDIT_ARG(vnpath, dp, ARG_VNODE1);
1085 } else if (cnp->cn_flags & AUDITVNPATH2) {
1086 AUDIT_ARG(vnpath, dp, ARG_VNODE2);
1087 }
1088
1089 #if NAMEDRSRCFORK
1090 /*
1091 * Caller wants the resource fork.
1092 */
1093 if ((cnp->cn_flags & CN_WANTSRSRCFORK) && (dp != NULLVP)) {
1094 error = lookup_handle_rsrc_fork(dp, ndp, cnp, wantparent, ctx);
1095 if (error != 0) {
1096 goto out;
1097 }
1098
1099 dp = ndp->ni_vp;
1100 }
1101 #endif
1102 if (kdebug_enable) {
1103 kdebug_lookup(ndp->ni_vp, cnp);
1104 }
1105
1106 return 0;
1107
1108 emptyname:
1109 error = lookup_handle_emptyname(ndp, cnp, wantparent);
1110 if (error != 0) {
1111 goto out;
1112 }
1113
1114 return 0;
1115 out:
1116 return error;
1117 }
1118
1119 /*
1120 * Comes in iocount on ni_vp. May overwrite ni_dvp, but doesn't interpret incoming value.
1121 */
1122 static int
lookup_handle_emptyname(struct nameidata * ndp,struct componentname * cnp,int wantparent)1123 lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent)
1124 {
1125 vnode_t dp;
1126 int error = 0;
1127
1128 dp = ndp->ni_vp;
1129 cnp->cn_namelen = 0;
1130 /*
1131 * A degenerate name (e.g. / or "") which is a way of
1132 * talking about a directory, e.g. like "/." or ".".
1133 */
1134 if (dp->v_type != VDIR) {
1135 error = ENOTDIR;
1136 goto out;
1137 }
1138 if (cnp->cn_nameiop == CREATE && dp == rootvnode) {
1139 error = EEXIST;
1140 goto out;
1141 }
1142 if (cnp->cn_nameiop != LOOKUP) {
1143 error = EISDIR;
1144 goto out;
1145 }
1146 if (wantparent) {
1147 /*
1148 * note that we already hold a reference
1149 * on dp, but for some reason can't
1150 * get another one... in this case we
1151 * need to do vnode_put on dp in 'bad'
1152 */
1153 if ((vnode_get(dp))) {
1154 error = ENOENT;
1155 goto out;
1156 }
1157 ndp->ni_dvp = dp;
1158 }
1159 cnp->cn_flags &= ~ISDOTDOT;
1160 cnp->cn_flags |= ISLASTCN;
1161 ndp->ni_next = cnp->cn_nameptr;
1162 ndp->ni_vp = dp;
1163
1164 if (cnp->cn_flags & AUDITVNPATH1) {
1165 AUDIT_ARG(vnpath, dp, ARG_VNODE1);
1166 } else if (cnp->cn_flags & AUDITVNPATH2) {
1167 AUDIT_ARG(vnpath, dp, ARG_VNODE2);
1168 }
1169 if (cnp->cn_flags & SAVESTART) {
1170 panic("lookup: SAVESTART");
1171 }
1172
1173 return 0;
1174 out:
1175 return error;
1176 }
1177 /*
1178 * Search a pathname.
1179 * This is a very central and rather complicated routine.
1180 *
1181 * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
1182 * The starting directory is taken from ni_startdir. The pathname is
1183 * descended until done, or a symbolic link is encountered. The variable
1184 * ni_more is clear if the path is completed; it is set to one if a
1185 * symbolic link needing interpretation is encountered.
1186 *
1187 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
1188 * whether the name is to be looked up, created, renamed, or deleted.
1189 * When CREATE, RENAME, or DELETE is specified, information usable in
1190 * creating, renaming, or deleting a directory entry may be calculated.
1191 * If flag has LOCKPARENT or'ed into it, the parent directory is returned
1192 * locked. If flag has WANTPARENT or'ed into it, the parent directory is
1193 * returned unlocked. Otherwise the parent directory is not returned. If
1194 * the target of the pathname exists and LOCKLEAF is or'ed into the flag
1195 * the target is returned locked, otherwise it is returned unlocked.
1196 * When creating or renaming and LOCKPARENT is specified, the target may not
1197 * be ".". When deleting and LOCKPARENT is specified, the target may be ".".
1198 *
1199 * Overall outline of lookup:
1200 *
1201 * dirloop:
1202 * identify next component of name at ndp->ni_ptr
1203 * handle degenerate case where name is null string
1204 * if .. and crossing mount points and on mounted filesys, find parent
1205 * call VNOP_LOOKUP routine for next component name
1206 * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
1207 * component vnode returned in ni_vp (if it exists), locked.
1208 * if result vnode is mounted on and crossing mount points,
1209 * find mounted on vnode
1210 * if more components of name, do next level at dirloop
1211 * return the answer in ni_vp, locked if LOCKLEAF set
1212 * if LOCKPARENT set, return locked parent in ni_dvp
1213 * if WANTPARENT set, return unlocked parent in ni_dvp
1214 *
1215 * Returns: 0 Success
1216 * ENOENT No such file or directory
1217 * EBADF Bad file descriptor
1218 * ENOTDIR Not a directory
1219 * EROFS Read-only file system [CREATE]
1220 * EISDIR Is a directory [CREATE]
1221 * cache_lookup_path:ERECYCLE (vnode was recycled from underneath us, redrive lookup again)
1222 * vnode_authorize:EROFS
1223 * vnode_authorize:EACCES
1224 * vnode_authorize:EPERM
1225 * vnode_authorize:???
1226 * VNOP_LOOKUP:ENOENT No such file or directory
1227 * VNOP_LOOKUP:EJUSTRETURN Restart system call (INTERNAL)
1228 * VNOP_LOOKUP:???
1229 * VFS_ROOT:ENOTSUP
1230 * VFS_ROOT:ENOENT
1231 * VFS_ROOT:???
1232 */
1233 int
lookup(struct nameidata * ndp)1234 lookup(struct nameidata *ndp)
1235 {
1236 char *cp; /* pointer into pathname argument */
1237 vnode_t tdp; /* saved dp */
1238 vnode_t dp; /* the directory we are searching */
1239 int docache = 1; /* == 0 do not cache last component */
1240 int wantparent; /* 1 => wantparent or lockparent flag */
1241 int rdonly; /* lookup read-only flag bit */
1242 int dp_authorized = 0;
1243 int error = 0;
1244 struct componentname *cnp = &ndp->ni_cnd;
1245 vfs_context_t ctx = cnp->cn_context;
1246 int vbusyflags = 0;
1247 int nc_generation = 0;
1248 vnode_t last_dp = NULLVP;
1249 int keep_going;
1250 int atroot;
1251
1252 /*
1253 * Setup: break out flag bits into variables.
1254 */
1255 if (cnp->cn_flags & NOCACHE) {
1256 docache = 0;
1257 }
1258 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
1259 rdonly = cnp->cn_flags & RDONLY;
1260 cnp->cn_flags &= ~ISSYMLINK;
1261 cnp->cn_consume = 0;
1262
1263 dp = ndp->ni_startdir;
1264 ndp->ni_startdir = NULLVP;
1265
1266 if ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) {
1267 vbusyflags = LK_NOWAIT;
1268 }
1269 cp = cnp->cn_nameptr;
1270
1271 if (*cp == '\0') {
1272 if ((vnode_getwithref(dp))) {
1273 dp = NULLVP;
1274 error = ENOENT;
1275 goto bad;
1276 }
1277 ndp->ni_vp = dp;
1278 error = lookup_handle_emptyname(ndp, cnp, wantparent);
1279 if (error) {
1280 goto bad;
1281 }
1282
1283 return 0;
1284 }
1285 dirloop:
1286 atroot = 0;
1287 ndp->ni_vp = NULLVP;
1288
1289 if ((error = cache_lookup_path(ndp, cnp, dp, ctx, &dp_authorized, last_dp))) {
1290 dp = NULLVP;
1291 goto bad;
1292 }
1293 if ((cnp->cn_flags & ISLASTCN)) {
1294 if (docache) {
1295 cnp->cn_flags |= MAKEENTRY;
1296 }
1297 } else {
1298 cnp->cn_flags |= MAKEENTRY;
1299 }
1300
1301 dp = ndp->ni_dvp;
1302
1303 if (ndp->ni_vp != NULLVP) {
1304 /*
1305 * cache_lookup_path returned a non-NULL ni_vp then,
1306 * we're guaranteed that the dp is a VDIR, it's
1307 * been authorized, and vp is not ".."
1308 *
1309 * make sure we don't try to enter the name back into
1310 * the cache if this vp is purged before we get to that
1311 * check since we won't have serialized behind whatever
1312 * activity is occurring in the FS that caused the purge
1313 */
1314 if (dp != NULLVP) {
1315 nc_generation = dp->v_nc_generation - 1;
1316 }
1317
1318 goto returned_from_lookup_path;
1319 }
1320
1321 /*
1322 * Handle "..": three special cases.
1323 * 1. if at starting directory (e.g. the cwd/usedvp)
1324 * and RESOLVE_BENEATH, then return EACCES.
1325 * 2. If at root directory (e.g. after chroot)
1326 * or at absolute root directory
1327 * then ignore it so can't get out.
1328 * 3. If this vnode is the root of a mounted
1329 * filesystem, then replace it with the
1330 * vnode which was mounted on so we take the
1331 * .. in the other file system.
1332 */
1333 if ((cnp->cn_flags & ISDOTDOT)) {
1334 /* if dp is the starting directory and RESOLVE_BENEATH, we should return EACCES */
1335 if ((ndp->ni_flag & NAMEI_RESOLVE_BENEATH) && (dp == ndp->ni_usedvp)) {
1336 error = EACCES;
1337 goto bad;
1338 }
1339 /*
1340 * if this is a chroot'ed process, check if the current
1341 * directory is still a subdirectory of the process's
1342 * root directory.
1343 */
1344 if (ndp->ni_rootdir && (ndp->ni_rootdir != rootvnode) &&
1345 dp != ndp->ni_rootdir) {
1346 int sdir_error;
1347 int is_subdir = FALSE;
1348
1349 sdir_error = vnode_issubdir(dp, ndp->ni_rootdir,
1350 &is_subdir, vfs_context_kernel());
1351
1352 /*
1353 * If we couldn't determine if dp is a subdirectory of
1354 * ndp->ni_rootdir (sdir_error != 0), we let the request
1355 * proceed.
1356 */
1357 if (!sdir_error && !is_subdir) {
1358 vnode_put(dp);
1359 dp = ndp->ni_rootdir;
1360 /*
1361 * There's a ref on the process's root directory
1362 * but we can't use vnode_getwithref here as
1363 * there is nothing preventing that ref being
1364 * released by another thread.
1365 */
1366 if (vnode_get(dp)) {
1367 dp = NULLVP;
1368 error = ENOENT;
1369 goto bad;
1370 }
1371 }
1372 }
1373
1374 for (;;) {
1375 if (dp == ndp->ni_rootdir || dp == rootvnode) {
1376 ndp->ni_dvp = dp;
1377 ndp->ni_vp = dp;
1378 /*
1379 * we're pinned at the root
1380 * we've already got one reference on 'dp'
1381 * courtesy of cache_lookup_path... take
1382 * another one for the ".."
1383 * if we fail to get the new reference, we'll
1384 * drop our original down in 'bad'
1385 */
1386 if (vnode_get(dp)) {
1387 error = ENOENT;
1388 goto bad;
1389 }
1390 atroot = 1;
1391 goto returned_from_lookup_path;
1392 }
1393 if ((dp->v_flag & VROOT) == 0 ||
1394 (cnp->cn_flags & NOCROSSMOUNT)) {
1395 break;
1396 }
1397 if (dp->v_mount == NULL) { /* forced umount */
1398 error = EBADF;
1399 goto bad;
1400 }
1401 tdp = dp;
1402 dp = tdp->v_mount->mnt_vnodecovered;
1403
1404 if ((vnode_getwithref(dp))) {
1405 vnode_put(tdp);
1406 dp = NULLVP;
1407 error = ENOENT;
1408 goto bad;
1409 }
1410
1411 vnode_put(tdp);
1412
1413 ndp->ni_dvp = dp;
1414 dp_authorized = 0;
1415 }
1416 }
1417
1418 /*
1419 * We now have a segment name to search for, and a directory to search.
1420 */
1421 #if CONFIG_UNION_MOUNTS
1422 unionlookup:
1423 #endif /* CONFIG_UNION_MOUNTS */
1424 ndp->ni_vp = NULLVP;
1425
1426 if (dp->v_type != VDIR) {
1427 error = ENOTDIR;
1428 goto lookup_error;
1429 }
1430 if ((cnp->cn_flags & DONOTAUTH) != DONOTAUTH) {
1431 error = lookup_authorize_search(dp, cnp, dp_authorized, ctx);
1432 if (error) {
1433 goto lookup_error;
1434 }
1435 }
1436
1437 /*
1438 * Now that we've authorized a lookup, can bail out if the filesystem
1439 * will be doing a batched operation. Return an iocount on dvp.
1440 */
1441 #if NAMEDRSRCFORK
1442 if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp) && !(cnp->cn_flags & CN_WANTSRSRCFORK)) {
1443 #else
1444 if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp)) {
1445 #endif /* NAMEDRSRCFORK */
1446 ndp->ni_flag |= NAMEI_UNFINISHED;
1447 ndp->ni_ncgeneration = dp->v_nc_generation;
1448 return 0;
1449 }
1450
1451 nc_generation = dp->v_nc_generation;
1452
1453 /*
1454 * Note:
1455 * Filesystems that support hardlinks may want to call vnode_update_identity
1456 * if the lookup operation below will modify the in-core vnode to belong to a new point
1457 * in the namespace. VFS cannot infer whether or not the look up operation makes the vnode
1458 * name change or change parents. Without this, the lookup may make update
1459 * filesystem-specific in-core metadata but fail to update the v_parent or v_name
1460 * fields in the vnode. If VFS were to do this, it would be necessary to call
1461 * vnode_update_identity on every lookup operation -- expensive!
1462 *
1463 * However, even with this in place, multiple lookups may occur in between this lookup
1464 * and the subsequent vnop, so, at best, we could only guarantee that you would get a
1465 * valid path back, and not necessarily the one that you wanted.
1466 *
1467 * Example:
1468 * /tmp/a == /foo/b
1469 *
1470 * If you are now looking up /foo/b and the vnode for this link represents /tmp/a,
1471 * vnode_update_identity will fix the parentage so that you can get /foo/b back
1472 * through the v_parent chain (preventing you from getting /tmp/b back). It would
1473 * not fix whether or not you should or should not get /tmp/a vs. /foo/b.
1474 */
1475
1476 error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx);
1477
1478 if (error) {
1479 lookup_error:
1480 #if CONFIG_UNION_MOUNTS
1481 if ((error == ENOENT) &&
1482 (dp->v_mount != NULL) &&
1483 (dp->v_mount->mnt_flag & MNT_UNION)) {
1484 tdp = dp;
1485 error = lookup_traverse_union(tdp, &dp, ctx);
1486 vnode_put(tdp);
1487 if (error) {
1488 dp = NULLVP;
1489 goto bad;
1490 }
1491
1492 ndp->ni_dvp = dp;
1493 dp_authorized = 0;
1494 goto unionlookup;
1495 }
1496 #endif /* CONFIG_UNION_MOUNTS */
1497
1498 if (error != EJUSTRETURN) {
1499 goto bad;
1500 }
1501
1502 if (ndp->ni_vp != NULLVP) {
1503 panic("leaf should be empty");
1504 }
1505
1506 #if NAMEDRSRCFORK
1507 /*
1508 * At this point, error should be EJUSTRETURN.
1509 *
1510 * If CN_WANTSRSRCFORK is set, that implies that the
1511 * underlying filesystem could not find the "parent" of the
1512 * resource fork (the data fork), and we are doing a lookup
1513 * for a CREATE event.
1514 *
1515 * However, this should be converted to an error, as the
1516 * failure to find this parent should disallow further
1517 * progress to try and acquire a resource fork vnode.
1518 */
1519 if (cnp->cn_flags & CN_WANTSRSRCFORK) {
1520 error = ENOENT;
1521 goto bad;
1522 }
1523 #endif
1524
1525 error = lookup_validate_creation_path(ndp);
1526 if (error) {
1527 goto bad;
1528 }
1529 /*
1530 * We return with ni_vp NULL to indicate that the entry
1531 * doesn't currently exist, leaving a pointer to the
1532 * referenced directory vnode in ndp->ni_dvp.
1533 */
1534 if (cnp->cn_flags & SAVESTART) {
1535 if ((vnode_get(ndp->ni_dvp))) {
1536 error = ENOENT;
1537 goto bad;
1538 }
1539 ndp->ni_startdir = ndp->ni_dvp;
1540 }
1541 if (!wantparent) {
1542 vnode_put(ndp->ni_dvp);
1543 }
1544
1545 if (kdebug_enable) {
1546 kdebug_lookup(ndp->ni_dvp, cnp);
1547 }
1548 return 0;
1549 }
1550 returned_from_lookup_path:
1551 /* We'll always have an iocount on ni_vp when this finishes. */
1552 error = lookup_handle_found_vnode(ndp, cnp, rdonly, vbusyflags, &keep_going, nc_generation, wantparent, atroot, ctx);
1553 if (error != 0) {
1554 goto bad2;
1555 }
1556
1557 if (keep_going) {
1558 dp = ndp->ni_vp;
1559
1560 /* namei() will handle symlinks */
1561 if ((dp->v_type == VLNK) &&
1562 ((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) {
1563 return 0;
1564 }
1565
1566 /*
1567 * Otherwise, there's more path to process.
1568 * cache_lookup_path is now responsible for dropping io ref on dp
1569 * when it is called again in the dirloop. This ensures we hold
1570 * a ref on dp until we complete the next round of lookup.
1571 */
1572 last_dp = dp;
1573
1574 goto dirloop;
1575 }
1576
1577 return 0;
1578 bad2:
1579 if (ndp->ni_dvp) {
1580 vnode_put(ndp->ni_dvp);
1581 }
1582
1583 vnode_put(ndp->ni_vp);
1584 ndp->ni_vp = NULLVP;
1585
1586 if (kdebug_enable) {
1587 kdebug_lookup(dp, cnp);
1588 }
1589 return error;
1590
1591 bad:
1592 if (dp) {
1593 vnode_put(dp);
1594 }
1595 ndp->ni_vp = NULLVP;
1596
1597 if (kdebug_enable) {
1598 kdebug_lookup(dp, cnp);
1599 }
1600 return error;
1601 }
1602
1603 #if CONFIG_UNION_MOUNTS
1604 /*
1605 * Given a vnode in a union mount, traverse to the equivalent
1606 * vnode in the underlying mount.
1607 */
1608 int
1609 lookup_traverse_union(vnode_t dvp, vnode_t *new_dvp, vfs_context_t ctx)
1610 {
1611 char *path = NULL, *pp;
1612 const char *name, *np;
1613 size_t len;
1614 int error = 0;
1615 struct nameidata nd;
1616 vnode_t vp = dvp;
1617
1618 *new_dvp = NULL;
1619
1620 if (vp && vp->v_flag & VROOT) {
1621 *new_dvp = vp->v_mount->mnt_vnodecovered;
1622 if (vnode_getwithref(*new_dvp)) {
1623 return ENOENT;
1624 }
1625 return 0;
1626 }
1627
1628 path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
1629
1630 /*
1631 * Walk back up to the mountpoint following the
1632 * v_parent chain and build a slash-separated path.
1633 * Then lookup that path starting with the covered vnode.
1634 */
1635 pp = path + (MAXPATHLEN - 1);
1636 *pp = '\0';
1637
1638 while (1) {
1639 name = vnode_getname(vp);
1640 if (name == NULL) {
1641 printf("lookup_traverse_union: null parent name: .%s\n", pp);
1642 error = ENOENT;
1643 goto done;
1644 }
1645 len = strlen(name);
1646 if ((len + 1) > (size_t)(pp - path)) { // Enough space for this name ?
1647 error = ENAMETOOLONG;
1648 vnode_putname(name);
1649 goto done;
1650 }
1651 for (np = name + len; len > 0; len--) { // Copy name backwards
1652 *--pp = *--np;
1653 }
1654 vnode_putname(name);
1655 vp = vp->v_parent;
1656 if (vp == NULLVP || vp->v_flag & VROOT) {
1657 break;
1658 }
1659 *--pp = '/';
1660 }
1661
1662 /* Evaluate the path in the underlying mount */
1663 NDINIT(&nd, LOOKUP, OP_LOOKUP, USEDVP, UIO_SYSSPACE, CAST_USER_ADDR_T(pp), ctx);
1664 nd.ni_dvp = dvp->v_mount->mnt_vnodecovered;
1665 error = namei(&nd);
1666 if (error == 0) {
1667 *new_dvp = nd.ni_vp;
1668 }
1669 nameidone(&nd);
1670 done:
1671 if (path) {
1672 zfree(ZV_NAMEI, path);
1673 }
1674 return error;
1675 }
1676 #endif /* CONFIG_UNION_MOUNTS */
1677
1678 int
1679 lookup_validate_creation_path(struct nameidata *ndp)
1680 {
1681 struct componentname *cnp = &ndp->ni_cnd;
1682
1683 /*
1684 * If creating and at end of pathname, then can consider
1685 * allowing file to be created.
1686 */
1687 if (cnp->cn_flags & RDONLY) {
1688 return EROFS;
1689 }
1690 if ((cnp->cn_flags & ISLASTCN) && (ndp->ni_flag & NAMEI_TRAILINGSLASH) && !(cnp->cn_flags & WILLBEDIR)) {
1691 return ENOENT;
1692 }
1693
1694 return 0;
1695 }
1696
1697 /*
1698 * Modifies only ni_vp. Always returns with ni_vp still valid (iocount held).
1699 */
1700 static int
1701 lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
1702 int vbusyflags, vfs_context_t ctx)
1703 {
1704 mount_t mp;
1705 vnode_t tdp;
1706 int error = 0;
1707 uint32_t depth = 0;
1708 vnode_t mounted_on_dp;
1709 int current_mount_generation = 0;
1710 #if CONFIG_TRIGGERS
1711 vnode_t triggered_dp = NULLVP;
1712 int retry_cnt = 0;
1713 #define MAX_TRIGGER_RETRIES 1
1714 #endif
1715
1716 if (dp->v_type != VDIR || cnp->cn_flags & NOCROSSMOUNT) {
1717 return 0;
1718 }
1719
1720 mounted_on_dp = dp;
1721 #if CONFIG_TRIGGERS
1722 restart:
1723 #endif
1724 current_mount_generation = mount_generation;
1725
1726 while (dp->v_mountedhere) {
1727 vnode_lock_spin(dp);
1728 if ((mp = dp->v_mountedhere)) {
1729 mp->mnt_crossref++;
1730 vnode_unlock(dp);
1731 } else {
1732 vnode_unlock(dp);
1733 break;
1734 }
1735
1736 if (ISSET(mp->mnt_lflag, MNT_LFORCE)) {
1737 mount_dropcrossref(mp, dp, 0);
1738 break; // don't traverse into a forced unmount
1739 }
1740
1741
1742 if (vfs_busy(mp, vbusyflags)) {
1743 mount_dropcrossref(mp, dp, 0);
1744 if (vbusyflags == LK_NOWAIT) {
1745 error = ENOENT;
1746 goto out;
1747 }
1748
1749 continue;
1750 }
1751
1752 error = VFS_ROOT(mp, &tdp, ctx);
1753
1754 mount_dropcrossref(mp, dp, 0);
1755 vfs_unbusy(mp);
1756
1757 if (error) {
1758 goto out;
1759 }
1760
1761 vnode_put(dp);
1762 ndp->ni_vp = dp = tdp;
1763 if (dp->v_type != VDIR) {
1764 #if DEVELOPMENT || DEBUG
1765 panic("%s : Root of filesystem not a directory",
1766 __FUNCTION__);
1767 #else
1768 break;
1769 #endif
1770 }
1771 depth++;
1772 }
1773
1774 #if CONFIG_TRIGGERS
1775 /*
1776 * The triggered_dp check here is required but is susceptible to a
1777 * (unlikely) race in which trigger mount is done from here and is
1778 * unmounted before we get past vfs_busy above. We retry to deal with
1779 * that case but it has the side effect of unwanted retries for
1780 * "special" processes which don't want to trigger mounts.
1781 */
1782 if (dp->v_resolve && retry_cnt < MAX_TRIGGER_RETRIES) {
1783 error = vnode_trigger_resolve(dp, ndp, ctx);
1784 if (error) {
1785 goto out;
1786 }
1787 if (dp == triggered_dp) {
1788 retry_cnt += 1;
1789 } else {
1790 retry_cnt = 0;
1791 }
1792 triggered_dp = dp;
1793 goto restart;
1794 }
1795 #endif /* CONFIG_TRIGGERS */
1796
1797 if (depth) {
1798 mp = mounted_on_dp->v_mountedhere;
1799
1800 if (mp) {
1801 mount_lock_spin(mp);
1802 mp->mnt_realrootvp_vid = dp->v_id;
1803 mp->mnt_realrootvp = dp;
1804 mp->mnt_generation = current_mount_generation;
1805 mount_unlock(mp);
1806 }
1807 }
1808
1809 return 0;
1810
1811 out:
1812 return error;
1813 }
1814
1815 /*
1816 * Takes ni_vp and ni_dvp non-NULL. Returns with *new_dp set to the location
1817 * at which to start a lookup with a resolved path, and all other iocounts dropped.
1818 */
1819 static int
1820 lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, bool *new_dp_has_iocount, vfs_context_t ctx)
1821 {
1822 int error;
1823 char *cp = NULL; /* pointer into pathname argument */
1824 u_int cplen = 0;
1825 uio_t auio;
1826 UIO_STACKBUF(uio_buf, 1);
1827 int need_newpathbuf;
1828 u_int linklen = 0;
1829 struct componentname *cnp = &ndp->ni_cnd;
1830 vnode_t dp;
1831 char *tmppn;
1832 u_int rsrclen = (cnp->cn_flags & CN_WANTSRSRCFORK) ? sizeof(_PATH_RSRCFORKSPEC) : 0;
1833 bool dp_has_iocount = false;
1834
1835 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
1836 return ELOOP;
1837 }
1838 #if CONFIG_MACF
1839 if ((error = mac_vnode_check_readlink(ctx, ndp->ni_vp)) != 0) {
1840 return error;
1841 }
1842 #endif /* MAC */
1843 if (ndp->ni_pathlen > 1 || !(cnp->cn_flags & HASBUF)) {
1844 need_newpathbuf = 1;
1845 } else {
1846 need_newpathbuf = 0;
1847 }
1848
1849 if (need_newpathbuf) {
1850 if (!(cnp->cn_flags & HASBUF) || cnp->cn_pnlen == MAXPATHLEN) {
1851 cp = zalloc(ZV_NAMEI);
1852 cplen = MAXPATHLEN;
1853 } else {
1854 assert(proc_support_long_paths(vfs_context_proc(ctx)));
1855 cp = kalloc_data(cnp->cn_pnlen, Z_WAITOK | Z_ZERO);
1856 cplen = cnp->cn_pnlen;
1857 }
1858 } else {
1859 cp = cnp->cn_pnbuf;
1860 }
1861 auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
1862
1863 uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN);
1864
1865 error = VNOP_READLINK(ndp->ni_vp, auio, ctx);
1866
1867 if (!error) {
1868 user_ssize_t resid = uio_resid(auio);
1869
1870 assert(resid <= MAXPATHLEN);
1871
1872 if (resid == MAXPATHLEN) {
1873 linklen = 0;
1874 } else {
1875 /*
1876 * Safe to set unsigned with a [larger] signed type here
1877 * because 0 <= uio_resid <= MAXPATHLEN and MAXPATHLEN
1878 * is only 1024.
1879 */
1880 linklen = (u_int)strnlen(cp, MAXPATHLEN - (u_int)resid);
1881 }
1882
1883 size_t maxlen = proc_support_long_paths(vfs_context_proc(ctx)) ? MAXLONGPATHLEN : MAXPATHLEN;
1884
1885 if (linklen == 0) {
1886 error = ENOENT;
1887 } else if (linklen + ndp->ni_pathlen + rsrclen > maxlen) {
1888 error = ENAMETOOLONG;
1889 }
1890 }
1891
1892 if (error) {
1893 if (need_newpathbuf) {
1894 if (cplen == MAXPATHLEN) {
1895 zfree(ZV_NAMEI, cp);
1896 } else {
1897 kfree_data(cp, cplen);
1898 }
1899 }
1900 return error;
1901 }
1902
1903 if (need_newpathbuf) {
1904 tmppn = cnp->cn_pnbuf;
1905 u_int tmplen = cnp->cn_pnlen;
1906 bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
1907 cnp->cn_pnbuf = cp;
1908 cnp->cn_pnlen = cplen;
1909
1910 if ((cnp->cn_flags & HASBUF)) {
1911 if (tmplen == MAXPATHLEN) {
1912 zfree(ZV_NAMEI, tmppn);
1913 } else {
1914 kfree_data(tmppn, tmplen);
1915 }
1916 } else {
1917 cnp->cn_flags |= HASBUF;
1918 }
1919 } else {
1920 cnp->cn_pnbuf[linklen] = '\0';
1921 }
1922
1923 ndp->ni_pathlen += linklen;
1924 cnp->cn_nameptr = cnp->cn_pnbuf;
1925
1926 /*
1927 * starting point for 'relative'
1928 * symbolic link path
1929 */
1930 dp = ndp->ni_dvp;
1931
1932 /*
1933 * get rid of reference returned via 'lookup'
1934 * ni_dvp is released only if we restart at /.
1935 */
1936 vnode_put(ndp->ni_vp);
1937 ndp->ni_vp = NULLVP;
1938 ndp->ni_dvp = NULLVP;
1939
1940 dp_has_iocount = true;
1941
1942 /*
1943 * Check if symbolic link restarts us at the root
1944 */
1945 if (*(cnp->cn_nameptr) == '/') {
1946 /* return EACCES if resolve beneath and the symlink restarts at root */
1947 if (ndp->ni_flag & NAMEI_RESOLVE_BENEATH) {
1948 vnode_put(dp); /* ALWAYS have a dvp for a symlink */
1949 return EACCES;
1950 }
1951 while (*(cnp->cn_nameptr) == '/') {
1952 cnp->cn_nameptr++;
1953 ndp->ni_pathlen--;
1954 }
1955 if (linklen != 0) {
1956 vnode_put(dp); /* ALWAYS have a dvp for a symlink */
1957 dp_has_iocount = false;
1958 if ((dp = ndp->ni_rootdir) == NULLVP) {
1959 return ENOENT;
1960 }
1961 }
1962 }
1963
1964 *new_dp = dp;
1965 *new_dp_has_iocount = dp_has_iocount;
1966
1967 return 0;
1968 }
1969
1970 /*
1971 * relookup - lookup a path name component
1972 * Used by lookup to re-aquire things.
1973 */
1974 int
1975 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
1976 {
1977 struct vnode *dp = NULL; /* the directory we are searching */
1978 int wantparent; /* 1 => wantparent or lockparent flag */
1979 int rdonly; /* lookup read-only flag bit */
1980 int error = 0;
1981 #ifdef NAMEI_DIAGNOSTIC
1982 int i, newhash; /* DEBUG: check name hash */
1983 char *cp; /* DEBUG: check name ptr/len */
1984 #endif
1985 vfs_context_t ctx = cnp->cn_context;
1986
1987 /*
1988 * Setup: break out flag bits into variables.
1989 */
1990 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
1991 rdonly = cnp->cn_flags & RDONLY;
1992 cnp->cn_flags &= ~ISSYMLINK;
1993
1994 if (cnp->cn_flags & NOCACHE) {
1995 cnp->cn_flags &= ~MAKEENTRY;
1996 } else {
1997 cnp->cn_flags |= MAKEENTRY;
1998 }
1999
2000 dp = dvp;
2001
2002 /*
2003 * Check for degenerate name (e.g. / or "")
2004 * which is a way of talking about a directory,
2005 * e.g. like "/." or ".".
2006 */
2007 if (cnp->cn_nameptr[0] == '\0') {
2008 if (cnp->cn_nameiop != LOOKUP || wantparent) {
2009 error = EISDIR;
2010 goto bad;
2011 }
2012 if (dp->v_type != VDIR) {
2013 error = ENOTDIR;
2014 goto bad;
2015 }
2016 if ((vnode_get(dp))) {
2017 error = ENOENT;
2018 goto bad;
2019 }
2020 *vpp = dp;
2021
2022 if (cnp->cn_flags & SAVESTART) {
2023 panic("lookup: SAVESTART");
2024 }
2025 return 0;
2026 }
2027 /*
2028 * We now have a segment name to search for, and a directory to search.
2029 */
2030 if ((error = VNOP_LOOKUP(dp, vpp, cnp, ctx))) {
2031 if (error != EJUSTRETURN) {
2032 goto bad;
2033 }
2034 #if DIAGNOSTIC
2035 if (*vpp != NULL) {
2036 panic("leaf should be empty");
2037 }
2038 #endif
2039 /*
2040 * If creating and at end of pathname, then can consider
2041 * allowing file to be created.
2042 */
2043 if (rdonly) {
2044 error = EROFS;
2045 goto bad;
2046 }
2047 /*
2048 * We return with ni_vp NULL to indicate that the entry
2049 * doesn't currently exist, leaving a pointer to the
2050 * (possibly locked) directory inode in ndp->ni_dvp.
2051 */
2052 return 0;
2053 }
2054 dp = *vpp;
2055
2056 #if DIAGNOSTIC
2057 /*
2058 * Check for symbolic link
2059 */
2060 if (dp->v_type == VLNK && (cnp->cn_flags & FOLLOW)) {
2061 panic("relookup: symlink found.");
2062 }
2063 #endif
2064
2065 /*
2066 * Disallow directory write attempts on read-only file systems.
2067 */
2068 if (rdonly &&
2069 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
2070 error = EROFS;
2071 goto bad2;
2072 }
2073 /* ASSERT(dvp == ndp->ni_startdir) */
2074
2075 return 0;
2076
2077 bad2:
2078 vnode_put(dp);
2079 bad:
2080 *vpp = NULL;
2081
2082 return error;
2083 }
2084
2085 /*
2086 * Free pathname buffer
2087 */
2088 void
2089 nameidone(struct nameidata *ndp)
2090 {
2091 if (ndp->ni_cnd.cn_flags & HASBUF) {
2092 char *tmp = ndp->ni_cnd.cn_pnbuf;
2093
2094 ndp->ni_cnd.cn_pnbuf = NULL;
2095 ndp->ni_cnd.cn_flags &= ~HASBUF;
2096 if (ndp->ni_cnd.cn_pnlen == MAXPATHLEN) {
2097 zfree(ZV_NAMEI, tmp);
2098 } else {
2099 kfree_data(tmp, ndp->ni_cnd.cn_pnlen);
2100 }
2101 }
2102 }
2103
2104
2105 /*
2106 * Log (part of) a pathname using kdebug, as used by fs_usage. The path up to
2107 * and including the current component name are logged. Up to NUMPARMS * 4
2108 * bytes of pathname will be logged. If the path to be logged is longer than
2109 * that, then the last NUMPARMS * 4 bytes are logged. That is, the truncation
2110 * removes the leading portion of the path.
2111 *
2112 * The logging is done via multiple KDBG_RELEASE calls. The first one is marked
2113 * with DBG_FUNC_START. The last one is marked with DBG_FUNC_END (in addition
2114 * to DBG_FUNC_START if it is also the first). There may be intermediate ones
2115 * with neither DBG_FUNC_START nor DBG_FUNC_END.
2116 *
2117 * The first event passes the vnode pointer and 24 or 32 (on K32, 12 or 24)
2118 * bytes of pathname. The remaining events add 32 (on K32, 16) bytes of
2119 * pathname each. The minimum number of events required to pass the path are
2120 * used. Any excess padding in the final event (because not all of the 24 or 32
2121 * (on K32, 12 or 16) bytes are needed for the remainder of the path) is set to
2122 * zero bytes, or '>' if there is more path beyond the current component name
2123 * (usually because an intermediate component was not found).
2124 *
2125 * NOTE: If the path length is greater than NUMPARMS * 4, or is not of the form
2126 * 24 + N * 32 (or on K32, 12 + N * 16), there will be no padding.
2127 */
2128 #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST)
2129
2130 void
2131 kdebug_vfs_lookup(const char *path, size_t path_len, void *vnp,
2132 uint32_t flags)
2133 {
2134 unsigned long path_words[4] = {};
2135 size_t trace_len = MIN(sizeof(path_words) - sizeof(path_words[0]), path_len);
2136 size_t path_next = 0;
2137 bool noprocfilt = flags & KDBG_VFS_LOOKUP_FLAG_NOPROCFILT;
2138
2139 assert(path_len >= 0);
2140
2141 int code = ((flags & KDBG_VFS_LOOKUP_FLAG_LOOKUP) ? VFS_LOOKUP :
2142 VFS_LOOKUP_DONE) | DBG_FUNC_START;
2143
2144 if (path_len <= (3 * (int)sizeof(long))) {
2145 code |= DBG_FUNC_END;
2146 }
2147 memcpy(path_words, path, trace_len);
2148 path_next += trace_len;
2149
2150 if (noprocfilt) {
2151 KDBG_RELEASE_NOPROCFILT(code, kdebug_vnode(vnp), path_words[0],
2152 path_words[1], path_words[2]);
2153 } else {
2154 KDBG_RELEASE(code, kdebug_vnode(vnp), path_words[0], path_words[1],
2155 path_words[2]);
2156 }
2157
2158 code &= ~DBG_FUNC_START;
2159
2160 for (int i = 3; i * (int)sizeof(long) < path_len; i += 4) {
2161 trace_len = sizeof(path_words);
2162 if ((i + 4) * (int)sizeof(long) >= path_len) {
2163 code |= DBG_FUNC_END;
2164 trace_len = path_len - path_next;
2165 memset(path_words, 0, sizeof(path_words));
2166 }
2167 memcpy(path_words, &path[path_next], trace_len);
2168 path_next += trace_len;
2169
2170 if (noprocfilt) {
2171 KDBG_RELEASE_NOPROCFILT(code, path_words[0], path_words[1],
2172 path_words[2], path_words[3]);
2173 } else {
2174 KDBG_RELEASE(code, path_words[0], path_words[1],
2175 path_words[2], path_words[3]);
2176 }
2177 }
2178 }
2179
2180 void
2181 kdebug_lookup_gen_events(long *path_words, int path_len, void *vnp, bool lookup)
2182 {
2183 assert(path_len >= 0);
2184 kdebug_vfs_lookup((const char *)path_words, path_len, vnp,
2185 lookup ? KDBG_VFS_LOOKUP_FLAG_LOOKUP : 0);
2186 }
2187
2188 void
2189 kdebug_lookup(vnode_t vnp, struct componentname *cnp)
2190 {
2191 kdebug_vfs_lookup(cnp->cn_pnbuf, strnlen(cnp->cn_pnbuf, cnp->cn_pnlen), vnp, KDBG_VFS_LOOKUP_FLAG_LOOKUP);
2192 }
2193
2194 #else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */
2195
2196 void
2197 kdebug_vfs_lookup(const char *dbg_parms __unused, size_t dbg_namelen __unused,
2198 void *dp __unused, __unused kdebug_vfs_lookup_flags_t flags)
2199 {
2200 }
2201
2202 static void
2203 kdebug_lookup(struct vnode *dp __unused, struct componentname *cnp __unused)
2204 {
2205 }
2206 #endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */
2207
2208 int
2209 vfs_getbyid(fsid_t *fsid, ino64_t ino, vnode_t *vpp, vfs_context_t ctx)
2210 {
2211 mount_t mp;
2212 int error;
2213
2214 mp = mount_lookupby_volfsid(fsid->val[0], 1);
2215 if (mp == NULL) {
2216 return EINVAL;
2217 }
2218
2219 /* Get the target vnode. */
2220 if (ino == 2) {
2221 error = VFS_ROOT(mp, vpp, ctx);
2222 } else {
2223 error = VFS_VGET(mp, ino, vpp, ctx);
2224 }
2225
2226 vfs_unbusy(mp);
2227 return error;
2228 }
2229 /*
2230 * Obtain the real path from a legacy volfs style path.
2231 *
2232 * Valid formats of input path:
2233 *
2234 * "555/@"
2235 * "555/2"
2236 * "555/123456"
2237 * "555/123456/foobar"
2238 *
2239 * Where:
2240 * 555 represents the volfs file system id
2241 * '@' and '2' are aliases to the root of a file system
2242 * 123456 represents a file id
2243 * "foobar" represents a file name
2244 */
2245 #if CONFIG_VOLFS
2246 static int
2247 vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_t ctx)
2248 {
2249 vnode_t vp;
2250 struct mount *mp = NULL;
2251 char *str;
2252 char ch;
2253 unsigned long id;
2254 ino64_t ino;
2255 int error;
2256 int length;
2257
2258 /* Get file system id and move str to next component. */
2259 id = strtoul(path, &str, 10);
2260 if (id == 0 || str[0] != '/') {
2261 return EINVAL;
2262 }
2263 while (*str == '/') {
2264 str++;
2265 }
2266 ch = *str;
2267
2268 if (id > INT_MAX) {
2269 return ENOENT;
2270 }
2271 mp = mount_lookupby_volfsid((int)id, 1);
2272 if (mp == NULL) {
2273 return EINVAL; /* unexpected failure */
2274 }
2275 /* Check for an alias to a file system root. */
2276 if (ch == '@' && str[1] == '\0') {
2277 ino = 2;
2278 str++;
2279 } else {
2280 /* Get file id and move str to next component. */
2281 ino = strtouq(str, &str, 10);
2282 }
2283
2284 /* Get the target vnode. */
2285 if (ino == 2) {
2286 struct vfs_attr vfsattr;
2287 int use_vfs_root = TRUE;
2288
2289 VFSATTR_INIT(&vfsattr);
2290 VFSATTR_WANTED(&vfsattr, f_capabilities);
2291 if (vfs_getattr(mp, &vfsattr, vfs_context_kernel()) == 0 &&
2292 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
2293 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS) &&
2294 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS)) {
2295 use_vfs_root = FALSE;
2296 }
2297 }
2298
2299 if (use_vfs_root) {
2300 error = VFS_ROOT(mp, &vp, ctx);
2301 } else {
2302 error = VFS_VGET(mp, ino, &vp, ctx);
2303 }
2304 } else {
2305 error = VFS_VGET(mp, ino, &vp, ctx);
2306 }
2307 vfs_unbusy(mp);
2308 if (error) {
2309 goto out;
2310 }
2311 realpath[0] = '\0';
2312
2313 /* Get the absolute path to this vnode. */
2314 error = build_path(vp, realpath, (int)bufsize, &length, 0, ctx);
2315 vnode_put(vp);
2316
2317 if (error == 0 && *str != '\0') {
2318 size_t attempt = strlcat(realpath, str, MAXPATHLEN);
2319 if (attempt > MAXPATHLEN) {
2320 error = ENAMETOOLONG;
2321 }
2322 }
2323 out:
2324 return error;
2325 }
2326 #endif
2327
2328 void
2329 lookup_compound_vnop_post_hook(int error, vnode_t dvp, vnode_t vp, struct nameidata *ndp, int did_create)
2330 {
2331 if (error == 0 && vp == NULLVP) {
2332 panic("NULL vp with error == 0.");
2333 }
2334
2335 /*
2336 * We don't want to do any of this if we didn't use the compound vnop
2337 * to perform the lookup... i.e. if we're allowing and using the legacy pattern,
2338 * where we did a full lookup.
2339 */
2340 if ((ndp->ni_flag & NAMEI_COMPOUND_OP_MASK) == 0) {
2341 return;
2342 }
2343
2344 /*
2345 * If we're going to continue the lookup, we'll handle
2346 * all lookup-related updates at that time.
2347 */
2348 if (error == EKEEPLOOKING) {
2349 return;
2350 }
2351
2352 /*
2353 * Only audit or update cache for *found* vnodes. For creation
2354 * neither would happen in the non-compound-vnop case.
2355 */
2356 if ((vp != NULLVP) && !did_create) {
2357 /*
2358 * If MAKEENTRY isn't set, and we've done a successful compound VNOP,
2359 * then we certainly don't want to update cache or identity.
2360 */
2361 if ((error != 0) || (ndp->ni_cnd.cn_flags & MAKEENTRY)) {
2362 lookup_consider_update_cache(dvp, vp, &ndp->ni_cnd, ndp->ni_ncgeneration);
2363 }
2364 if (ndp->ni_cnd.cn_flags & AUDITVNPATH1) {
2365 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2366 } else if (ndp->ni_cnd.cn_flags & AUDITVNPATH2) {
2367 AUDIT_ARG(vnpath, vp, ARG_VNODE2);
2368 }
2369 }
2370
2371 /*
2372 * If you created (whether you opened or not), cut a lookup tracepoint
2373 * for the parent dir (as would happen without a compound vnop). Note: we may need
2374 * a vnode despite failure in this case!
2375 *
2376 * If you did not create:
2377 * Found child (succeeded or not): cut a tracepoint for the child.
2378 * Did not find child: cut a tracepoint with the parent.
2379 */
2380 if (kdebug_enable) {
2381 kdebug_lookup(vp ? vp : dvp, &ndp->ni_cnd);
2382 }
2383 }
2384