1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_lookup.c 8.10 (Berkeley) 5/27/95
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/syslimits.h>
78 #include <sys/time.h>
79 #include <sys/namei.h>
80 #include <sys/vm.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/errno.h>
84 #include <kern/kalloc.h>
85 #include <sys/filedesc.h>
86 #include <sys/proc_internal.h>
87 #include <sys/kdebug.h>
88 #include <sys/unistd.h> /* For _PC_NAME_MAX */
89 #include <sys/uio_internal.h>
90 #include <sys/kauth.h>
91 #include <kern/zalloc.h>
92 #include <security/audit/audit.h>
93 #if CONFIG_MACF
94 #include <security/mac_framework.h>
95 #endif
96 #include <os/atomic_private.h>
97
98 #include <sys/paths.h>
99
100 #if NAMEDRSRCFORK
101 #include <sys/xattr.h>
102 #endif
103 /*
104 * The minimum volfs-style pathname is 9.
105 * Example: "/.vol/1/2"
106 */
107 #define VOLFS_MIN_PATH_LEN 9
108
109
110 #if CONFIG_VOLFS
111 static int vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_t ctx, vnode_t rdvp);
112 #define MAX_VOLFS_RESTARTS 5
113 #endif
114
115 static int lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx);
116 static int lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, bool* dp_has_iocount, vfs_context_t ctx);
117 static int lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx);
118 static void lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation);
119 static int lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly,
120 int vbusyflags, int *keep_going, int nc_generation,
121 int wantparent, int atroot, vfs_context_t ctx);
122 static int lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent);
123
124 #if NAMEDRSRCFORK
125 static int lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx);
126 #endif
127
128 extern lck_rw_t rootvnode_rw_lock;
129
130 #define RESOLVE_CHECKED 0x80000000
131
132 static KALLOC_HEAP_DEFINE(KHEAP_VFS_NAMEI, "vfs_namei", KHEAP_ID_DATA_BUFFERS);
133
134 /* namei allocation/free methods */
135
136 __typed_allocators_ignore_push
137
138 static void *
namei_alloc(size_t size)139 namei_alloc(size_t size)
140 {
141 assert(size <= MAXLONGPATHLEN);
142 return kheap_alloc(KHEAP_VFS_NAMEI, size, Z_WAITOK_ZERO_NOFAIL);
143 }
144
145 static void
namei_free(void * addr,size_t size)146 namei_free(void *addr, size_t size)
147 {
148 assert(size <= MAXLONGPATHLEN);
149 kheap_free(KHEAP_VFS_NAMEI, addr, size);
150 }
151
152 __typed_allocators_ignore_pop
153
154 /*
155 * Convert a pathname into a pointer to a locked inode.
156 *
157 * The FOLLOW flag is set when symbolic links are to be followed
158 * when they occur at the end of the name translation process.
159 * Symbolic links are always followed for all other pathname
160 * components other than the last.
161 *
162 * The segflg defines whether the name is to be copied from user
163 * space or kernel space.
164 *
165 * Overall outline of namei:
166 *
167 * copy in name
168 * get starting directory
169 * while (!done && !error) {
170 * call lookup to search path.
171 * if symbolic link, massage name in buffer and continue
172 * }
173 *
174 * Returns: 0 Success
175 * ENOENT No such file or directory
176 * ELOOP Too many levels of symbolic links
177 * ENAMETOOLONG Filename too long
178 * copyinstr:EFAULT Bad address
179 * copyinstr:ENAMETOOLONG Filename too long
180 * lookup:EBADF Bad file descriptor
181 * lookup:EROFS
182 * lookup:EACCES
183 * lookup:EPERM
184 * lookup:ERECYCLE vnode was recycled from underneath us in lookup.
185 * This means we should re-drive lookup from this point.
186 * lookup: ???
187 * VNOP_READLINK:???
188 */
189 int
namei(struct nameidata * ndp)190 namei(struct nameidata *ndp)
191 {
192 struct vnode *dp; /* the directory we are searching */
193 struct vnode *usedvp = ndp->ni_dvp; /* store pointer to vp in case we must loop due to
194 * heavy vnode pressure */
195 uint32_t cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */
196 int error;
197 struct componentname *cnp = &ndp->ni_cnd;
198 vfs_context_t ctx = cnp->cn_context;
199 proc_t p = vfs_context_proc(ctx);
200 #if CONFIG_AUDIT
201 /* XXX ut should be from context */
202 uthread_t ut = current_uthread();
203 #endif
204
205 #if CONFIG_VOLFS
206 int volfs_restarts = 0;
207 #endif
208 size_t bytes_copied = 0;
209 size_t resolve_prefix_len;
210 vnode_t rootdir_with_usecount = NULLVP;
211 vnode_t startdir_with_usecount = NULLVP;
212 vnode_t usedvp_dp = NULLVP;
213 int32_t old_count = 0;
214 uint32_t resolve_flags;
215 int resolve_error = 0;
216 bool dp_has_iocount = false;
217 bool clear_usedvp = false;
218
219 #if DIAGNOSTIC
220 if (!vfs_context_ucred(ctx) || !p) {
221 panic("namei: bad cred/proc");
222 }
223 if (cnp->cn_nameiop & (~OPMASK)) {
224 panic("namei: nameiop contaminated with flags");
225 }
226 if (cnp->cn_flags & OPMASK) {
227 panic("namei: flags contaminated with nameiops");
228 }
229 #endif
230
231 /*
232 * A compound VNOP found something that needs further processing:
233 * either a trigger vnode, a covered directory, or a symlink.
234 */
235 if (ndp->ni_flag & NAMEI_CONTLOOKUP) {
236 int rdonly, vbusyflags, keep_going, wantparent;
237
238 rdonly = cnp->cn_flags & RDONLY;
239 vbusyflags = ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) ? LK_NOWAIT : 0;
240 keep_going = 0;
241 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
242
243 ndp->ni_flag &= ~(NAMEI_CONTLOOKUP);
244
245 error = lookup_handle_found_vnode(ndp, &ndp->ni_cnd, rdonly, vbusyflags,
246 &keep_going, ndp->ni_ncgeneration, wantparent, 0, ctx);
247 if (error) {
248 goto out_drop;
249 }
250 if (keep_going) {
251 if ((cnp->cn_flags & ISSYMLINK) == 0) {
252 panic("We need to keep going on a continued lookup, but for vp type %d (tag %d)", ndp->ni_vp->v_type, ndp->ni_vp->v_tag);
253 }
254 goto continue_symlink;
255 }
256
257 return 0;
258 }
259
260 vnode_recycled:
261 /*
262 * Init the resolve states to 0 to ensure that the resolve prefix path got
263 * stripped in case we are retrying lookup due to vnode got recycled.
264 */
265 resolve_flags = 0;
266 resolve_prefix_len = 0;
267
268 /*
269 * Get a buffer for the name to be translated, and copy the
270 * name into the buffer.
271 */
272 if ((cnp->cn_flags & HASBUF) == 0) {
273 cnp->cn_pnbuf = ndp->ni_pathbuf;
274 cnp->cn_pnlen = PATHBUFLEN;
275 }
276
277 retry_copy:
278 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
279 error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
280 cnp->cn_pnlen, &bytes_copied);
281 } else {
282 error = copystr(CAST_DOWN(void *, ndp->ni_dirp), cnp->cn_pnbuf,
283 cnp->cn_pnlen, &bytes_copied);
284 }
285 if (error == ENAMETOOLONG && !(cnp->cn_flags & HASBUF)) {
286 if (bytes_copied == PATHBUFLEN) {
287 resolve_error = lookup_check_for_resolve_prefix(cnp->cn_pnbuf, PATHBUFLEN,
288 PATHBUFLEN, &resolve_flags, &resolve_prefix_len);
289 /* errors from copyinstr take precedence over resolve_error */
290 if (!resolve_error && resolve_prefix_len) {
291 ndp->ni_dirp += resolve_prefix_len;
292 resolve_prefix_len = 0;
293 }
294 }
295
296 cnp->cn_pnbuf = namei_alloc(MAXPATHLEN);
297 cnp->cn_flags |= HASBUF;
298 cnp->cn_pnlen = MAXPATHLEN;
299 bytes_copied = 0;
300
301 goto retry_copy;
302 } else if (error == ENAMETOOLONG && (cnp->cn_flags & HASBUF) &&
303 (cnp->cn_pnlen * 2) <= MAXLONGPATHLEN && proc_support_long_paths(p)) {
304 /* First time we arrive here, the buffer came from namei_alloc */
305 namei_free(cnp->cn_pnbuf, cnp->cn_pnlen);
306
307 resolve_error = 0;
308
309 cnp->cn_pnlen *= 2;
310 cnp->cn_pnbuf = namei_alloc(cnp->cn_pnlen);
311 bytes_copied = 0;
312
313 goto retry_copy;
314 }
315 if (error) {
316 goto error_out;
317 } else if (resolve_error) {
318 error = resolve_error;
319 goto error_out;
320 }
321 assert(bytes_copied <= cnp->cn_pnlen);
322 ndp->ni_pathlen = (u_int)bytes_copied;
323 bytes_copied = 0;
324
325 if (!(resolve_flags & RESOLVE_CHECKED)) {
326 assert(!(cnp->cn_flags & HASBUF) && (cnp->cn_pnlen == PATHBUFLEN));
327 error = lookup_check_for_resolve_prefix(cnp->cn_pnbuf, cnp->cn_pnlen, ndp->ni_pathlen,
328 &resolve_flags, &resolve_prefix_len);
329 if (error) {
330 goto error_out;
331 }
332 if (resolve_prefix_len) {
333 /*
334 * Since this is pointing to the static path buffer instead of a zalloc'ed memorry,
335 * we're not going to attempt to free this, so it is perfectly fine to change the
336 * value of cnp->cn_pnbuf.
337 */
338 cnp->cn_pnbuf += resolve_prefix_len;
339 cnp->cn_pnlen -= resolve_prefix_len;
340 ndp->ni_pathlen -= resolve_prefix_len;
341 resolve_prefix_len = 0;
342
343 /* Update ndp with the resolve flags */
344 if (resolve_flags & RESOLVE_NODOTDOT) {
345 ndp->ni_flag |= NAMEI_NODOTDOT;
346 }
347 if (resolve_flags & RESOLVE_LOCAL) {
348 ndp->ni_flag |= NAMEI_LOCAL;
349 }
350 if (resolve_flags & RESOLVE_NODEVFS) {
351 ndp->ni_flag |= NAMEI_NODEVFS;
352 }
353 if (resolve_flags & RESOLVE_IMMOVABLE) {
354 ndp->ni_flag |= NAMEI_IMMOVABLE;
355 }
356 if (resolve_flags & RESOLVE_UNIQUE) {
357 ndp->ni_flag |= NAMEI_UNIQUE;
358 }
359 if (resolve_flags & RESOLVE_NOXATTRS) {
360 ndp->ni_flag |= NAMEI_NOXATTRS;
361 }
362 }
363 }
364
365 /* At this point we should have stripped off the prefix from the path that has to be looked up */
366 assert((resolve_flags & RESOLVE_CHECKED) && (resolve_prefix_len == 0));
367
368 /*
369 * Since the name cache may contain positive entries of
370 * the incorrect case, force lookup() to bypass the cache
371 * and call directly into the filesystem for each path
372 * component. Note: the FS may still consult the cache,
373 * but can apply rules to validate the results.
374 */
375 if (proc_is_forcing_hfs_case_sensitivity(p)) {
376 cnp->cn_flags |= CN_SKIPNAMECACHE;
377 }
378
379 #if CONFIG_VOLFS
380 /*
381 * Check for legacy volfs style pathnames.
382 *
383 * For compatibility reasons we currently allow these paths,
384 * but future versions of the OS may not support them.
385 */
386 if (ndp->ni_pathlen >= VOLFS_MIN_PATH_LEN &&
387 cnp->cn_pnbuf[0] == '/' &&
388 cnp->cn_pnbuf[1] == '.' &&
389 cnp->cn_pnbuf[2] == 'v' &&
390 cnp->cn_pnbuf[3] == 'o' &&
391 cnp->cn_pnbuf[4] == 'l' &&
392 cnp->cn_pnbuf[5] == '/') {
393 char * realpath;
394 size_t realpathlen;
395 int realpath_err;
396 vnode_t rdvp = NULLVP;
397 /* Attempt to resolve a legacy volfs style pathname. */
398
399 realpathlen = MAXPATHLEN;
400 do {
401 /*
402 * To be consistent with the behavior of openbyid_np, which always supports
403 * long paths, do not gate our support on proc_support_long_paths either.
404 */
405 realpath = namei_alloc(realpathlen);
406
407 if (fdt_flag_test(&p->p_fd, FD_CHROOT)) {
408 proc_dirs_lock_shared(p);
409 if (fdt_flag_test(&p->p_fd, FD_CHROOT)) {
410 rdvp = p->p_fd.fd_rdir;
411 if (vnode_get(rdvp)) {
412 rdvp = NULLVP;
413 }
414 }
415 proc_dirs_unlock_shared(p);
416 }
417 /*
418 * We only error out on the ENAMETOOLONG cases where we know that
419 * vfs_getrealpath translation succeeded but the path could not fit into
420 * realpathlen characters. In other failure cases, we may be dealing with a path
421 * that legitimately looks like /.vol/1234/567 and is not meant to be translated
422 */
423 realpath_err = vfs_getrealpath(&cnp->cn_pnbuf[6], realpath, realpathlen, ctx, rdvp);
424
425 if (rdvp) {
426 vnode_put(rdvp);
427 rdvp = NULLVP;
428 }
429 if (realpath_err) {
430 namei_free(realpath, realpathlen);
431 if (realpath_err == ENOSPC || realpath_err == ENAMETOOLONG) {
432 error = ENAMETOOLONG;
433 }
434 } else {
435 size_t tmp_len;
436 if (cnp->cn_flags & HASBUF) {
437 namei_free(cnp->cn_pnbuf, cnp->cn_pnlen);
438 }
439 cnp->cn_pnbuf = realpath;
440 cnp->cn_pnlen = (int)realpathlen;
441 tmp_len = strlen(realpath) + 1;
442 assert(tmp_len <= UINT_MAX);
443 ndp->ni_pathlen = (u_int)tmp_len;
444 cnp->cn_flags |= HASBUF | CN_VOLFSPATH;
445 error = 0;
446 }
447 } while (error == ENAMETOOLONG && (realpathlen *= 2) && realpathlen <= MAXLONGPATHLEN);
448
449 if (error) {
450 goto error_out;
451 }
452 }
453 #endif /* CONFIG_VOLFS */
454
455 #if CONFIG_AUDIT
456 /* If we are auditing the kernel pathname, save the user pathname */
457 if (cnp->cn_flags & AUDITVNPATH1) {
458 AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH1);
459 }
460 if (cnp->cn_flags & AUDITVNPATH2) {
461 AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH2);
462 }
463 #endif /* CONFIG_AUDIT */
464
465 /*
466 * Do not allow empty pathnames
467 */
468 if (*cnp->cn_pnbuf == '\0') {
469 error = ENOENT;
470 goto error_out;
471 }
472 if (ndp->ni_flag & NAMEI_NOFOLLOW_ANY || (resolve_flags & RESOLVE_NOFOLLOW_ANY)) {
473 ndp->ni_loopcnt = MAXSYMLINKS;
474 } else {
475 ndp->ni_loopcnt = 0;
476 }
477
478 /*
479 * determine the starting point for the translation.
480 */
481 proc_dirs_lock_shared(p);
482 lck_rw_lock_shared(&rootvnode_rw_lock);
483
484 if (!(ndp->ni_flag & NAMEI_ROOTDIR)) {
485 if (fdt_flag_test(&p->p_fd, FD_CHROOT)) {
486 ndp->ni_rootdir = p->p_fd.fd_rdir;
487 } else {
488 ndp->ni_rootdir = rootvnode;
489 }
490 }
491
492 if (!ndp->ni_rootdir) {
493 if (ndp->ni_flag & NAMEI_ROOTDIR) {
494 panic("NAMEI_ROOTDIR is set but ni_rootdir is not\n");
495 } else if (fdt_flag_test(&p->p_fd, FD_CHROOT)) {
496 /* This should be a panic */
497 printf("p->p_fd.fd_rdir is not set\n");
498 } else {
499 printf("rootvnode is not set\n");
500 }
501 lck_rw_unlock_shared(&rootvnode_rw_lock);
502 proc_dirs_unlock_shared(p);
503 error = ENOENT;
504 goto error_out;
505 }
506
507 cnp->cn_nameptr = cnp->cn_pnbuf;
508
509 ndp->ni_usedvp = NULLVP;
510
511 if (*(cnp->cn_nameptr) == '/') {
512 while (*(cnp->cn_nameptr) == '/') {
513 cnp->cn_nameptr++;
514 ndp->ni_pathlen--;
515 }
516 if (ndp->ni_flag & NAMEI_RESOLVE_BENEATH) {
517 /* Absolute paths are never allowed in NAMEI_RESOLVE_BENEATH */
518 lck_rw_unlock_shared(&rootvnode_rw_lock);
519 proc_dirs_unlock_shared(p);
520 error = ENOTCAPABLE;
521 goto error_out;
522 }
523 dp = ndp->ni_rootdir;
524 } else if (cnp->cn_flags & USEDVP) {
525 dp = ndp->ni_dvp;
526 ndp->ni_usedvp = dp;
527 usedvp_dp = dp;
528 } else {
529 dp = vfs_context_cwd(ctx);
530 if (ndp->ni_flag & NAMEI_RESOLVE_BENEATH) {
531 /* Store the starting directory because it can change after a symlink traversal */
532 ndp->ni_usedvp = dp;
533 clear_usedvp = true;
534 }
535 }
536
537 if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) {
538 dp = NULLVP;
539 lck_rw_unlock_shared(&rootvnode_rw_lock);
540 proc_dirs_unlock_shared(p);
541 error = ENOENT;
542 goto error_out;
543 }
544
545 /*
546 * We need our own usecount on the root vnode and the starting dir across
547 * the lookup. There's two things that be done here. We can hold the locks
548 * (which protect the existing usecounts on the directories) across the
549 * lookup or take our own usecount. Holding the locks across the lookup can
550 * cause deadlock issues if we re-enter namei on the same thread so the
551 * correct thing to do is to acquire our own usecount.
552 *
553 * Ideally, the usecount should be obtained by vnode_get->vnode_ref->vnode_put.
554 * However when this vnode is the rootvnode, that sequence will produce a
555 * lot of vnode mutex locks and unlocks on a single vnode (the rootvnode)
556 * and will be highly contended and degrade performance. Since we have
557 * an existing usecount protected by the locks we hold, we'll just use
558 * an atomic op to increment the usecount on a vnode which already has one
559 * and can't be released because we have the locks which protect against that
560 * happening.
561 */
562 rootdir_with_usecount = ndp->ni_rootdir;
563 old_count = os_atomic_inc_orig(&rootdir_with_usecount->v_usecount, relaxed);
564 if (old_count < 1) {
565 panic("(1) invalid pre-increment usecount (%d) for rootdir vnode %p",
566 old_count, rootdir_with_usecount);
567 } else if (old_count == INT32_MAX) {
568 panic("(1) usecount overflow for vnode %p", rootdir_with_usecount);
569 }
570
571 if ((dp != rootdir_with_usecount) && (dp != usedvp_dp)) {
572 old_count = os_atomic_inc_orig(&dp->v_usecount, relaxed);
573 if (old_count < 1) {
574 panic("(2) invalid pre-increment usecount (%d) for vnode %p", old_count, dp);
575 } else if (old_count == INT32_MAX) {
576 panic("(2) usecount overflow for vnode %p", dp);
577 }
578 startdir_with_usecount = dp;
579 }
580
581 /* Now that we have our usecount, release the locks */
582 lck_rw_unlock_shared(&rootvnode_rw_lock);
583 proc_dirs_unlock_shared(p);
584
585 ndp->ni_dvp = NULLVP;
586 ndp->ni_vp = NULLVP;
587
588 for (;;) {
589 #if CONFIG_MACF
590 /*
591 * Give MACF policies a chance to reject the lookup
592 * before performing any filesystem operations.
593 * This hook is called before resolving the path and
594 * again each time a symlink is encountered.
595 * NB: policies receive path information as supplied
596 * by the caller and thus cannot be trusted.
597 */
598 error = mac_vnode_check_lookup_preflight(ctx, dp, cnp->cn_nameptr, cnp->cn_namelen);
599 if (error) {
600 goto error_out;
601 }
602 #endif
603 ndp->ni_startdir = dp;
604 dp = NULLVP;
605
606 if ((error = lookup(ndp))) {
607 goto error_out;
608 }
609
610 /*
611 * Check for symbolic link
612 */
613 if ((cnp->cn_flags & ISSYMLINK) == 0) {
614 if ((ndp->ni_flag & NAMEI_UNIQUE) && ndp->ni_vp && vnode_hasmultipath(ndp->ni_vp)) {
615 error = ENOTCAPABLE;
616 goto out_drop;
617 }
618 if (startdir_with_usecount) {
619 vnode_rele(startdir_with_usecount);
620 startdir_with_usecount = NULLVP;
621 }
622 if (rootdir_with_usecount) {
623 lck_rw_lock_shared(&rootvnode_rw_lock);
624 if (rootdir_with_usecount == rootvnode) {
625 old_count = os_atomic_dec_orig(&rootdir_with_usecount->v_usecount, relaxed);
626 if (old_count < 2) {
627 /*
628 * There needs to have been at least 1 usecount left on the rootvnode
629 */
630 panic("(3) Unexpected pre-decrement value (%d) of usecount for rootvnode %p",
631 old_count, rootdir_with_usecount);
632 }
633 rootdir_with_usecount = NULLVP;
634 }
635 lck_rw_unlock_shared(&rootvnode_rw_lock);
636 if (rootdir_with_usecount) {
637 vnode_rele(rootdir_with_usecount);
638 rootdir_with_usecount = NULLVP;
639 }
640 }
641
642 return 0;
643 }
644
645 continue_symlink:
646 /* Gives us a new path to process, and a starting dir */
647 error = lookup_handle_symlink(ndp, &dp, &dp_has_iocount, ctx);
648 if (error != 0) {
649 break;
650 }
651 if (dp_has_iocount) {
652 if ((dp != rootdir_with_usecount) && (dp != startdir_with_usecount) &&
653 (dp != usedvp_dp)) {
654 if (startdir_with_usecount) {
655 vnode_rele(startdir_with_usecount);
656 }
657 vnode_ref_ext(dp, 0, VNODE_REF_FORCE);
658 startdir_with_usecount = dp;
659 }
660 vnode_put(dp);
661 dp_has_iocount = false;
662 }
663 }
664 /*
665 * only come here if we fail to handle a SYMLINK...
666 * if either ni_dvp or ni_vp is non-NULL, then
667 * we need to drop the iocount that was picked
668 * up in the lookup routine
669 */
670 out_drop:
671 if (ndp->ni_dvp) {
672 vnode_put(ndp->ni_dvp);
673 }
674 if (ndp->ni_vp) {
675 vnode_put(ndp->ni_vp);
676 }
677 error_out:
678 if (clear_usedvp) {
679 ndp->ni_usedvp = NULLVP;
680 }
681 if (startdir_with_usecount) {
682 vnode_rele(startdir_with_usecount);
683 startdir_with_usecount = NULLVP;
684 }
685 if (rootdir_with_usecount) {
686 lck_rw_lock_shared(&rootvnode_rw_lock);
687 if (rootdir_with_usecount == rootvnode) {
688 old_count = os_atomic_dec_orig(&rootdir_with_usecount->v_usecount, relaxed);
689 if (old_count < 2) {
690 /*
691 * There needs to have been at least 1 usecount left on the rootvnode
692 */
693 panic("(4) Unexpected pre-decrement value (%d) of usecount for rootvnode %p",
694 old_count, rootdir_with_usecount);
695 }
696 lck_rw_unlock_shared(&rootvnode_rw_lock);
697 } else {
698 lck_rw_unlock_shared(&rootvnode_rw_lock);
699 vnode_rele(rootdir_with_usecount);
700 }
701 rootdir_with_usecount = NULLVP;
702 }
703
704 if ((cnp->cn_flags & HASBUF)) {
705 cnp->cn_flags &= ~HASBUF;
706 namei_free(cnp->cn_pnbuf, cnp->cn_pnlen);
707 }
708 cnp->cn_pnbuf = NULL;
709 ndp->ni_vp = NULLVP;
710 ndp->ni_dvp = NULLVP;
711
712 #if CONFIG_VOLFS
713 /*
714 * Deal with volfs fallout.
715 *
716 * At this point, if we were originally given a volfs path that
717 * looks like /.vol/123/456, then we would have had to convert it into
718 * a full path. Assuming that part worked properly, we will now attempt
719 * to conduct a lookup of the item in the namespace. Under normal
720 * circumstances, if a user looked up /tmp/foo and it was not there, it
721 * would be permissible to return ENOENT.
722 *
723 * However, we may not want to do that here. Specifically, the volfs path
724 * uniquely identifies a certain item in the namespace regardless of where it
725 * lives. If the item has moved in between the time we constructed the
726 * path and now, when we're trying to do a lookup/authorization on the full
727 * path, we may have gotten an ENOENT.
728 *
729 * At this point we can no longer tell if the path no longer exists
730 * or if the item in question no longer exists. It could have been renamed
731 * away, in which case the /.vol identifier is still valid.
732 *
733 * Do this dance a maximum of MAX_VOLFS_RESTARTS times.
734 */
735 if ((error == ENOENT) && (ndp->ni_cnd.cn_flags & CN_VOLFSPATH)) {
736 if (volfs_restarts < MAX_VOLFS_RESTARTS) {
737 volfs_restarts++;
738 goto vnode_recycled;
739 }
740 }
741 #endif
742
743 if (error == ERECYCLE) {
744 /* vnode was recycled underneath us. re-drive lookup to start at
745 * the beginning again, since recycling invalidated last lookup*/
746 ndp->ni_cnd.cn_flags = cnpflags;
747 ndp->ni_dvp = usedvp;
748 goto vnode_recycled;
749 }
750
751
752 return error;
753 }
754
755 int
namei_compound_available(vnode_t dp,struct nameidata * ndp)756 namei_compound_available(vnode_t dp, struct nameidata *ndp)
757 {
758 if ((ndp->ni_flag & NAMEI_COMPOUNDOPEN) != 0) {
759 return vnode_compound_open_available(dp);
760 }
761
762 return 0;
763 }
764
765 int
lookup_check_for_resolve_prefix(char * path,size_t pathbuflen,size_t len,uint32_t * resolve_flags,size_t * prefix_len)766 lookup_check_for_resolve_prefix(char *path, size_t pathbuflen, size_t len, uint32_t *resolve_flags, size_t *prefix_len)
767 {
768 int error = 0;
769 *resolve_flags = (uint32_t)RESOLVE_CHECKED;
770 *prefix_len = 0;
771
772 if (len < (sizeof("/.nofollow/") - 1) || path[0] != '/' || path[1] != '.') {
773 return 0;
774 }
775
776 if ((strncmp(&path[2], "nofollow/", (sizeof("nofollow/") - 1)) == 0)) {
777 *resolve_flags |= RESOLVE_NOFOLLOW_ANY;
778 *prefix_len = sizeof("/.nofollow") - 1;
779 } else if ((len >= sizeof("/.resolve/1/") - 1) &&
780 strncmp(&path[2], "resolve/", (sizeof("resolve/") - 1)) == 0) {
781 char * flag = path + (sizeof("/.resolve/") - 1);
782 char *next = flag;
783 char last_char = path[pathbuflen - 1];
784
785 /* no leading zeroes or non digits */
786 if ((flag[0] == '0' && flag[1] != '/') ||
787 flag[0] < '0' || flag[0] > '9') {
788 error = EINVAL;
789 goto out;
790 }
791
792 path[pathbuflen - 1] = '\0';
793 unsigned long flag_val = strtoul(flag, &next, 10);
794 path[pathbuflen - 1] = last_char;
795 if (next[0] != '/' || (flag_val & ~RESOLVE_VALIDMASK)) {
796 error = EINVAL;
797 goto out;
798 }
799 assert(next >= flag);
800 *resolve_flags |= (uint32_t)flag_val;
801 *prefix_len = (size_t)(next - path);
802 }
803 out:
804 assert(*prefix_len <= sizeof("/.resolve/2147483647"));
805 return error;
806 }
807
808 static int
lookup_authorize_search(vnode_t dp,struct componentname * cnp,int dp_authorized_in_cache,vfs_context_t ctx)809 lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx)
810 {
811 #if !CONFIG_MACF
812 #pragma unused(cnp)
813 #endif
814
815 int error;
816
817 if (!dp_authorized_in_cache) {
818 error = vnode_authorize(dp, NULL, KAUTH_VNODE_SEARCH, ctx);
819 if (error) {
820 return error;
821 }
822 }
823 #if CONFIG_MACF
824 error = mac_vnode_check_lookup(ctx, dp, cnp);
825 if (error) {
826 return error;
827 }
828 #endif /* CONFIG_MACF */
829
830 return 0;
831 }
832
833 static void
lookup_consider_update_cache(vnode_t dvp,vnode_t vp,struct componentname * cnp,int nc_generation)834 lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation)
835 {
836 int isdot_or_dotdot;
837 isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT);
838
839 if (vp->v_name == NULL || vp->v_parent == NULLVP) {
840 int update_flags = 0;
841
842 if (isdot_or_dotdot == 0) {
843 if (vp->v_name == NULL) {
844 update_flags |= VNODE_UPDATE_NAME;
845 }
846 if (dvp != NULLVP && vp->v_parent == NULLVP) {
847 update_flags |= VNODE_UPDATE_PARENT;
848 }
849
850 if (update_flags) {
851 vnode_update_identity(vp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, update_flags);
852 }
853 }
854 }
855 if ((cnp->cn_flags & MAKEENTRY) && (vp->v_flag & VNCACHEABLE) && LIST_FIRST(&vp->v_nclinks) == NULL) {
856 /*
857 * missing from name cache, but should
858 * be in it... this can happen if volfs
859 * causes the vnode to be created or the
860 * name cache entry got recycled but the
861 * vnode didn't...
862 * check to make sure that ni_dvp is valid
863 * cache_lookup_path may return a NULL
864 * do a quick check to see if the generation of the
865 * directory matches our snapshot... this will get
866 * rechecked behind the name cache lock, but if it
867 * already fails to match, no need to go any further
868 */
869 if (dvp != NULLVP && (nc_generation == dvp->v_nc_generation) && (!isdot_or_dotdot)) {
870 cache_enter_with_gen(dvp, vp, cnp, nc_generation);
871 }
872 }
873 }
874
875 #if NAMEDRSRCFORK
876 /*
877 * Can change ni_dvp and ni_vp. On success, returns with iocounts on stream vnode (always) and
878 * data fork if requested. On failure, returns with iocount data fork (always) and its parent directory
879 * (if one was provided).
880 */
881 static int
lookup_handle_rsrc_fork(vnode_t dp,struct nameidata * ndp,struct componentname * cnp,int wantparent,vfs_context_t ctx)882 lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx)
883 {
884 vnode_t svp = NULLVP;
885 enum nsoperation nsop;
886 int nsflags;
887 int error;
888
889 if (dp->v_type != VREG) {
890 error = ENOENT;
891 goto out;
892 }
893 switch (cnp->cn_nameiop) {
894 case DELETE:
895 if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
896 nsop = NS_DELETE;
897 } else {
898 error = EPERM;
899 goto out;
900 }
901 break;
902 case CREATE:
903 if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
904 nsop = NS_CREATE;
905 } else {
906 error = EPERM;
907 goto out;
908 }
909 break;
910 case LOOKUP:
911 /* Make sure our lookup of "/..namedfork/rsrc" is allowed. */
912 if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
913 nsop = NS_OPEN;
914 } else {
915 error = EPERM;
916 goto out;
917 }
918 break;
919 default:
920 error = EPERM;
921 goto out;
922 }
923
924 nsflags = 0;
925 if (cnp->cn_flags & CN_RAW_ENCRYPTED) {
926 nsflags |= NS_GETRAWENCRYPTED;
927 }
928
929 /* Ask the file system for the resource fork. */
930 error = vnode_getnamedstream(dp, &svp, XATTR_RESOURCEFORK_NAME, nsop, nsflags, ctx);
931
932 /* During a create, it OK for stream vnode to be missing. */
933 if (error == ENOATTR || error == ENOENT) {
934 error = (nsop == NS_CREATE) ? 0 : ENOENT;
935 }
936 if (error) {
937 goto out;
938 }
939 /* The "parent" of the stream is the file. */
940 if (wantparent) {
941 if (ndp->ni_dvp) {
942 vnode_put(ndp->ni_dvp);
943 }
944 ndp->ni_dvp = dp;
945 } else {
946 vnode_put(dp);
947 }
948 ndp->ni_vp = svp; /* on create this may be null */
949
950 /* Restore the truncated pathname buffer (for audits). */
951 if (ndp->ni_pathlen == 1 && ndp->ni_next[0] == '\0') {
952 /*
953 * While we replaced only '/' with '\0' and would ordinarily
954 * need to just switch that back, the buffer in which we did
955 * this may not be what the pathname buffer is now when symlinks
956 * are involved. If we just restore the "/" we will make the
957 * string not terminated anymore, so be safe and restore the
958 * entire suffix.
959 */
960 strncpy(ndp->ni_next, _PATH_RSRCFORKSPEC, sizeof(_PATH_RSRCFORKSPEC));
961 cnp->cn_nameptr = ndp->ni_next + 1;
962 cnp->cn_namelen = sizeof(_PATH_RSRCFORKSPEC) - 1;
963 ndp->ni_next += cnp->cn_namelen;
964 if (ndp->ni_next[0] != '\0') {
965 panic("Incorrect termination of path in %s", __FUNCTION__);
966 }
967 }
968 cnp->cn_flags &= ~MAKEENTRY;
969
970 return 0;
971 out:
972 return error;
973 }
974 #endif /* NAMEDRSRCFORK */
975
976 /*
977 * iocounts in:
978 * --One on ni_vp. One on ni_dvp if there is more path, or we didn't come through the
979 * cache, or we came through the cache and the caller doesn't want the parent.
980 *
981 * iocounts out:
982 * --Leaves us in the correct state for the next step, whatever that might be.
983 * --If we find a symlink, returns with iocounts on both ni_vp and ni_dvp.
984 * --If we are to look up another component, then we have an iocount on ni_vp and
985 * nothing else.
986 * --If we are done, returns an iocount on ni_vp, and possibly on ni_dvp depending on nameidata flags.
987 * --In the event of an error, may return with ni_dvp NULL'ed out (in which case, iocount
988 * was dropped).
989 */
990 static int
lookup_handle_found_vnode(struct nameidata * ndp,struct componentname * cnp,int rdonly,int vbusyflags,int * keep_going,int nc_generation,int wantparent,int atroot,vfs_context_t ctx)991 lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly,
992 int vbusyflags, int *keep_going, int nc_generation,
993 int wantparent, int atroot, vfs_context_t ctx)
994 {
995 vnode_t dp;
996 int error;
997 char *cp;
998
999 dp = ndp->ni_vp;
1000 *keep_going = 0;
1001
1002 if (ndp->ni_vp == NULLVP) {
1003 panic("NULL ni_vp in %s", __FUNCTION__);
1004 }
1005
1006 if (atroot) {
1007 goto nextname;
1008 }
1009
1010 /*
1011 * Take into account any additional components consumed by
1012 * the underlying filesystem.
1013 */
1014 if (cnp->cn_consume > 0) {
1015 cnp->cn_nameptr += cnp->cn_consume;
1016 ndp->ni_next += cnp->cn_consume;
1017 ndp->ni_pathlen -= cnp->cn_consume;
1018 cnp->cn_consume = 0;
1019 } else {
1020 lookup_consider_update_cache(ndp->ni_dvp, dp, cnp, nc_generation);
1021 }
1022
1023 /*
1024 * Check to see if the vnode has been mounted on...
1025 * if so find the root of the mounted file system.
1026 * Updates ndp->ni_vp.
1027 */
1028 error = lookup_traverse_mountpoints(ndp, cnp, dp, vbusyflags, ctx);
1029 dp = ndp->ni_vp;
1030 if (error) {
1031 goto out;
1032 }
1033
1034 #if CONFIG_MACF
1035 if (vfs_flags(vnode_mount(dp)) & MNT_MULTILABEL) {
1036 error = vnode_label(vnode_mount(dp), NULL, dp, NULL, 0, ctx);
1037 if (error) {
1038 goto out;
1039 }
1040 }
1041 #endif
1042
1043 /*
1044 * Check for symbolic link
1045 */
1046 if ((dp->v_type == VLNK) &&
1047 ((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) {
1048 cnp->cn_flags |= ISSYMLINK;
1049 *keep_going = 1;
1050 return 0;
1051 }
1052
1053 /*
1054 * Check for bogus trailing slashes.
1055 */
1056 if ((ndp->ni_flag & NAMEI_TRAILINGSLASH)) {
1057 if (dp->v_type != VDIR) {
1058 #if CONFIG_MACF
1059 /*
1060 * Prevent the information disclosure on the vnode
1061 */
1062 if (mac_vnode_check_stat(ctx, NOCRED, dp) == EPERM) {
1063 error = EPERM;
1064 goto out;
1065 }
1066 #endif /* CONFIG_MACF */
1067 error = ENOTDIR;
1068 goto out;
1069 }
1070 ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
1071 }
1072
1073 #if NAMEDSTREAMS
1074 /*
1075 * Deny namei/lookup requests to resolve paths that point to shadow files.
1076 * Access to shadow files must be conducted by explicit calls to VNOP_LOOKUP
1077 * directly, and not use lookup/namei
1078 */
1079 if (vnode_isshadow(dp)) {
1080 error = ENOENT;
1081 goto out;
1082 }
1083 #endif
1084
1085 nextname:
1086 /*
1087 * Not a symbolic link. If more pathname,
1088 * continue at next component, else return.
1089 *
1090 * Definitely have a dvp if there's another slash
1091 */
1092 if (*ndp->ni_next == '/') {
1093 cnp->cn_nameptr = ndp->ni_next + 1;
1094 ndp->ni_pathlen--;
1095 while (*cnp->cn_nameptr == '/') {
1096 cnp->cn_nameptr++;
1097 ndp->ni_pathlen--;
1098 }
1099
1100 cp = cnp->cn_nameptr;
1101 vnode_put(ndp->ni_dvp);
1102 ndp->ni_dvp = NULLVP;
1103
1104 if (*cp == '\0') {
1105 goto emptyname;
1106 }
1107
1108 *keep_going = 1;
1109 return 0;
1110 }
1111
1112 /*
1113 * Disallow directory write attempts on read-only file systems.
1114 */
1115 if (rdonly &&
1116 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
1117 error = EROFS;
1118 goto out;
1119 }
1120
1121 /* If SAVESTART is set, we should have a dvp */
1122 if (cnp->cn_flags & SAVESTART) {
1123 /*
1124 * note that we already hold a reference
1125 * on both dp and ni_dvp, but for some reason
1126 * can't get another one... in this case we
1127 * need to do vnode_put on dp in 'bad2'
1128 */
1129 if ((vnode_get(ndp->ni_dvp))) {
1130 error = ENOENT;
1131 goto out;
1132 }
1133 ndp->ni_startdir = ndp->ni_dvp;
1134 }
1135 if (!wantparent && ndp->ni_dvp) {
1136 vnode_put(ndp->ni_dvp);
1137 ndp->ni_dvp = NULLVP;
1138 }
1139
1140 if (cnp->cn_flags & AUDITVNPATH1) {
1141 AUDIT_ARG(vnpath, dp, ARG_VNODE1);
1142 } else if (cnp->cn_flags & AUDITVNPATH2) {
1143 AUDIT_ARG(vnpath, dp, ARG_VNODE2);
1144 }
1145
1146 #if NAMEDRSRCFORK
1147 /*
1148 * Caller wants the resource fork.
1149 */
1150 if ((cnp->cn_flags & CN_WANTSRSRCFORK) && (dp != NULLVP)) {
1151 error = lookup_handle_rsrc_fork(dp, ndp, cnp, wantparent, ctx);
1152 if (error != 0) {
1153 goto out;
1154 }
1155
1156 dp = ndp->ni_vp;
1157 }
1158 #endif
1159 if (kdebug_enable) {
1160 kdebug_lookup(ndp->ni_vp, cnp);
1161 }
1162
1163 return 0;
1164
1165 emptyname:
1166 error = lookup_handle_emptyname(ndp, cnp, wantparent);
1167 if (error != 0) {
1168 goto out;
1169 }
1170
1171 return 0;
1172 out:
1173 return error;
1174 }
1175
1176 /*
1177 * Comes in iocount on ni_vp. May overwrite ni_dvp, but doesn't interpret incoming value.
1178 */
1179 static int
lookup_handle_emptyname(struct nameidata * ndp,struct componentname * cnp,int wantparent)1180 lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent)
1181 {
1182 vnode_t dp;
1183 int error = 0;
1184
1185 dp = ndp->ni_vp;
1186 cnp->cn_namelen = 0;
1187 /*
1188 * A degenerate name (e.g. / or "") which is a way of
1189 * talking about a directory, e.g. like "/." or ".".
1190 */
1191 if (dp->v_type != VDIR) {
1192 error = ENOTDIR;
1193 goto out;
1194 }
1195 if (cnp->cn_nameiop == CREATE && dp == rootvnode) {
1196 error = EEXIST;
1197 goto out;
1198 }
1199 if (cnp->cn_nameiop != LOOKUP) {
1200 error = EISDIR;
1201 goto out;
1202 }
1203 if (wantparent) {
1204 /*
1205 * note that we already hold a reference
1206 * on dp, but for some reason can't
1207 * get another one... in this case we
1208 * need to do vnode_put on dp in 'bad'
1209 */
1210 if ((vnode_get(dp))) {
1211 error = ENOENT;
1212 goto out;
1213 }
1214 ndp->ni_dvp = dp;
1215 }
1216 cnp->cn_flags &= ~ISDOTDOT;
1217 cnp->cn_flags |= ISLASTCN;
1218 ndp->ni_next = cnp->cn_nameptr;
1219 ndp->ni_vp = dp;
1220
1221 if (cnp->cn_flags & AUDITVNPATH1) {
1222 AUDIT_ARG(vnpath, dp, ARG_VNODE1);
1223 } else if (cnp->cn_flags & AUDITVNPATH2) {
1224 AUDIT_ARG(vnpath, dp, ARG_VNODE2);
1225 }
1226 if (cnp->cn_flags & SAVESTART) {
1227 panic("lookup: SAVESTART");
1228 }
1229
1230 return 0;
1231 out:
1232 return error;
1233 }
1234 /*
1235 * Search a pathname.
1236 * This is a very central and rather complicated routine.
1237 *
1238 * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
1239 * The starting directory is taken from ni_startdir. The pathname is
1240 * descended until done, or a symbolic link is encountered. The variable
1241 * ni_more is clear if the path is completed; it is set to one if a
1242 * symbolic link needing interpretation is encountered.
1243 *
1244 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
1245 * whether the name is to be looked up, created, renamed, or deleted.
1246 * When CREATE, RENAME, or DELETE is specified, information usable in
1247 * creating, renaming, or deleting a directory entry may be calculated.
1248 * If flag has LOCKPARENT or'ed into it, the parent directory is returned
1249 * locked. If flag has WANTPARENT or'ed into it, the parent directory is
1250 * returned unlocked. Otherwise the parent directory is not returned. If
1251 * the target of the pathname exists and LOCKLEAF is or'ed into the flag
1252 * the target is returned locked, otherwise it is returned unlocked.
1253 * When creating or renaming and LOCKPARENT is specified, the target may not
1254 * be ".". When deleting and LOCKPARENT is specified, the target may be ".".
1255 *
1256 * Overall outline of lookup:
1257 *
1258 * dirloop:
1259 * identify next component of name at ndp->ni_ptr
1260 * handle degenerate case where name is null string
1261 * if .. and crossing mount points and on mounted filesys, find parent
1262 * call VNOP_LOOKUP routine for next component name
1263 * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
1264 * component vnode returned in ni_vp (if it exists), locked.
1265 * if result vnode is mounted on and crossing mount points,
1266 * find mounted on vnode
1267 * if more components of name, do next level at dirloop
1268 * return the answer in ni_vp, locked if LOCKLEAF set
1269 * if LOCKPARENT set, return locked parent in ni_dvp
1270 * if WANTPARENT set, return unlocked parent in ni_dvp
1271 *
1272 * Returns: 0 Success
1273 * ENOENT No such file or directory
1274 * EBADF Bad file descriptor
1275 * ENOTDIR Not a directory
1276 * EROFS Read-only file system [CREATE]
1277 * EISDIR Is a directory [CREATE]
1278 * cache_lookup_path:ERECYCLE (vnode was recycled from underneath us, redrive lookup again)
1279 * vnode_authorize:EROFS
1280 * vnode_authorize:EACCES
1281 * vnode_authorize:EPERM
1282 * vnode_authorize:???
1283 * VNOP_LOOKUP:ENOENT No such file or directory
1284 * VNOP_LOOKUP:EJUSTRETURN Restart system call (INTERNAL)
1285 * VNOP_LOOKUP:???
1286 * VFS_ROOT:ENOTSUP
1287 * VFS_ROOT:ENOENT
1288 * VFS_ROOT:???
1289 */
1290 int
lookup(struct nameidata * ndp)1291 lookup(struct nameidata *ndp)
1292 {
1293 char *cp; /* pointer into pathname argument */
1294 vnode_t tdp; /* saved dp */
1295 vnode_t dp; /* the directory we are searching */
1296 int docache = 1; /* == 0 do not cache last component */
1297 int wantparent; /* 1 => wantparent or lockparent flag */
1298 int rdonly; /* lookup read-only flag bit */
1299 int dp_authorized = 0;
1300 int error = 0;
1301 struct componentname *cnp = &ndp->ni_cnd;
1302 vfs_context_t ctx = cnp->cn_context;
1303 int vbusyflags = 0;
1304 int nc_generation = 0;
1305 vnode_t last_dp = NULLVP;
1306 int keep_going;
1307 int atroot;
1308
1309 /*
1310 * Setup: break out flag bits into variables.
1311 */
1312 if (cnp->cn_flags & NOCACHE) {
1313 docache = 0;
1314 }
1315 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
1316 rdonly = cnp->cn_flags & RDONLY;
1317 cnp->cn_flags &= ~ISSYMLINK;
1318 cnp->cn_consume = 0;
1319
1320 dp = ndp->ni_startdir;
1321 ndp->ni_startdir = NULLVP;
1322
1323 if ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) {
1324 vbusyflags = LK_NOWAIT;
1325 }
1326 cp = cnp->cn_nameptr;
1327
1328 if (*cp == '\0') {
1329 if ((vnode_getwithref(dp))) {
1330 dp = NULLVP;
1331 error = ENOENT;
1332 goto bad;
1333 }
1334 ndp->ni_vp = dp;
1335 error = lookup_handle_emptyname(ndp, cnp, wantparent);
1336 if (error) {
1337 goto bad;
1338 }
1339
1340 return 0;
1341 }
1342 dirloop:
1343 atroot = 0;
1344 ndp->ni_vp = NULLVP;
1345
1346 if ((error = cache_lookup_path(ndp, cnp, dp, ctx, &dp_authorized, last_dp))) {
1347 dp = NULLVP;
1348 goto bad;
1349 }
1350 if ((cnp->cn_flags & ISLASTCN)) {
1351 if (docache) {
1352 cnp->cn_flags |= MAKEENTRY;
1353 }
1354 } else {
1355 cnp->cn_flags |= MAKEENTRY;
1356 }
1357
1358 dp = ndp->ni_dvp;
1359
1360 if (ndp->ni_vp != NULLVP) {
1361 /*
1362 * cache_lookup_path returned a non-NULL ni_vp then,
1363 * we're guaranteed that the dp is a VDIR, it's
1364 * been authorized, and vp is not ".."
1365 *
1366 * make sure we don't try to enter the name back into
1367 * the cache if this vp is purged before we get to that
1368 * check since we won't have serialized behind whatever
1369 * activity is occurring in the FS that caused the purge
1370 */
1371 if (dp != NULLVP) {
1372 nc_generation = dp->v_nc_generation - 1;
1373 }
1374
1375 goto returned_from_lookup_path;
1376 }
1377
1378 #if NAMEDRSRCFORK
1379 /* return ENOTCAPABLE if path lookup on named streams is prohibited. */
1380 if ((ndp->ni_flag & NAMEI_NOXATTRS) &&
1381 (ndp->ni_pathlen == sizeof(_PATH_RSRCFORKSPEC)) &&
1382 (ndp->ni_next[1] == '.' && ndp->ni_next[2] == '.') &&
1383 bcmp(ndp->ni_next, _PATH_RSRCFORKSPEC, sizeof(_PATH_RSRCFORKSPEC)) == 0) {
1384 error = ENOTCAPABLE;
1385 goto bad;
1386 }
1387 #endif /* NAMEDRSRCFORK */
1388
1389 /*
1390 * Handle "..": three special cases.
1391 * 1. if at starting directory (e.g. the cwd/usedvp)
1392 * and RESOLVE_BENEATH, then return ENOTCAPABLE.
1393 * 2. If at root directory (e.g. after chroot)
1394 * or at absolute root directory
1395 * then ignore it so can't get out.
1396 * 3. If this vnode is the root of a mounted
1397 * filesystem, then replace it with the
1398 * vnode which was mounted on so we take the
1399 * .. in the other file system.
1400 */
1401 if ((cnp->cn_flags & ISDOTDOT)) {
1402 /* if dp is the starting directory and RESOLVE_BENEATH, we should return ENOTCAPABLE */
1403 if ((ndp->ni_flag & NAMEI_RESOLVE_BENEATH) && (dp == ndp->ni_usedvp)) {
1404 error = ENOTCAPABLE;
1405 goto bad;
1406 }
1407 /* return ENOTCAPABLE if '..' path traversal is prohibited */
1408 if ((ndp->ni_flag & NAMEI_NODOTDOT)) {
1409 error = ENOTCAPABLE;
1410 goto bad;
1411 }
1412 /*
1413 * if this is a chroot'ed process, check if the current
1414 * directory is still a subdirectory of the process's
1415 * root directory.
1416 */
1417 if (ndp->ni_rootdir && (ndp->ni_rootdir != rootvnode) &&
1418 dp != ndp->ni_rootdir) {
1419 int sdir_error;
1420 int is_subdir = FALSE;
1421
1422 sdir_error = vnode_issubdir(dp, ndp->ni_rootdir,
1423 &is_subdir, vfs_context_kernel());
1424
1425 /*
1426 * If we couldn't determine if dp is a subdirectory of
1427 * ndp->ni_rootdir (sdir_error != 0), we let the request
1428 * proceed.
1429 */
1430 if (!sdir_error && !is_subdir) {
1431 vnode_put(dp);
1432 dp = ndp->ni_rootdir;
1433 /*
1434 * namei takes a ref on ndp->ni_rootdir
1435 */
1436 if (vnode_getwithref(dp)) {
1437 dp = NULLVP;
1438 error = ENOENT;
1439 goto bad;
1440 }
1441 }
1442 }
1443
1444 for (;;) {
1445 if (dp == ndp->ni_rootdir || dp == rootvnode) {
1446 ndp->ni_dvp = dp;
1447 ndp->ni_vp = dp;
1448 /*
1449 * we're pinned at the root
1450 * we've already got one reference on 'dp'
1451 * courtesy of cache_lookup_path... take
1452 * another one for the ".."
1453 * if we fail to get the new reference, we'll
1454 * drop our original down in 'bad'
1455 */
1456 if (vnode_get(dp)) {
1457 error = ENOENT;
1458 goto bad;
1459 }
1460 atroot = 1;
1461 goto returned_from_lookup_path;
1462 }
1463 if ((dp->v_flag & VROOT) == 0 ||
1464 (cnp->cn_flags & NOCROSSMOUNT)) {
1465 break;
1466 }
1467 if (dp->v_mount == NULL) { /* forced umount */
1468 error = EBADF;
1469 goto bad;
1470 }
1471 if ((ndp->ni_flag & NAMEI_RESOLVE_BENEATH) && (cnp->cn_flags & ISDOTDOT) && (dp->v_mount->mnt_vnodecovered == ndp->ni_usedvp)) {
1472 /* Ensure ".." doesn't escape after mount point traversal */
1473 error = ENOTCAPABLE;
1474 goto bad;
1475 }
1476 tdp = dp;
1477 dp = tdp->v_mount->mnt_vnodecovered;
1478
1479 if ((vnode_getwithref(dp))) {
1480 vnode_put(tdp);
1481 dp = NULLVP;
1482 error = ENOENT;
1483 goto bad;
1484 }
1485
1486 vnode_put(tdp);
1487
1488 ndp->ni_dvp = dp;
1489 dp_authorized = 0;
1490 }
1491 }
1492
1493 /*
1494 * We now have a segment name to search for, and a directory to search.
1495 */
1496 #if CONFIG_UNION_MOUNTS
1497 unionlookup:
1498 #endif /* CONFIG_UNION_MOUNTS */
1499 ndp->ni_vp = NULLVP;
1500
1501 if (dp->v_type != VDIR) {
1502 #if CONFIG_MACF
1503 /*
1504 * Prevent the information disclosure on the vnode
1505 */
1506 if (mac_vnode_check_stat(ctx, NOCRED, dp) == EPERM) {
1507 error = EPERM;
1508 goto lookup_error;
1509 }
1510 #endif /* CONFIG_MACF */
1511 error = ENOTDIR;
1512 goto lookup_error;
1513 }
1514 if ((cnp->cn_flags & DONOTAUTH) != DONOTAUTH) {
1515 error = lookup_authorize_search(dp, cnp, dp_authorized, ctx);
1516 if (error) {
1517 goto lookup_error;
1518 }
1519 }
1520
1521 /*
1522 * Now that we've authorized a lookup, can bail out if the filesystem
1523 * will be doing a batched operation. Return an iocount on dvp.
1524 */
1525 #if NAMEDRSRCFORK
1526 if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp) && !(cnp->cn_flags & CN_WANTSRSRCFORK)) {
1527 #else
1528 if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp)) {
1529 #endif /* NAMEDRSRCFORK */
1530 ndp->ni_flag |= NAMEI_UNFINISHED;
1531 ndp->ni_ncgeneration = dp->v_nc_generation;
1532 return 0;
1533 }
1534
1535 nc_generation = dp->v_nc_generation;
1536
1537 /*
1538 * Note:
1539 * Filesystems that support hardlinks may want to call vnode_update_identity
1540 * if the lookup operation below will modify the in-core vnode to belong to a new point
1541 * in the namespace. VFS cannot infer whether or not the look up operation makes the vnode
1542 * name change or change parents. Without this, the lookup may make update
1543 * filesystem-specific in-core metadata but fail to update the v_parent or v_name
1544 * fields in the vnode. If VFS were to do this, it would be necessary to call
1545 * vnode_update_identity on every lookup operation -- expensive!
1546 *
1547 * However, even with this in place, multiple lookups may occur in between this lookup
1548 * and the subsequent vnop, so, at best, we could only guarantee that you would get a
1549 * valid path back, and not necessarily the one that you wanted.
1550 *
1551 * Example:
1552 * /tmp/a == /foo/b
1553 *
1554 * If you are now looking up /foo/b and the vnode for this link represents /tmp/a,
1555 * vnode_update_identity will fix the parentage so that you can get /foo/b back
1556 * through the v_parent chain (preventing you from getting /tmp/b back). It would
1557 * not fix whether or not you should or should not get /tmp/a vs. /foo/b.
1558 */
1559
1560 error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx);
1561
1562 if (error) {
1563 lookup_error:
1564 #if CONFIG_UNION_MOUNTS
1565 if ((error == ENOENT) &&
1566 (dp->v_mount != NULL) &&
1567 (dp->v_mount->mnt_flag & MNT_UNION)) {
1568 tdp = dp;
1569 error = lookup_traverse_union(tdp, &dp, ctx);
1570 vnode_put(tdp);
1571 if (error) {
1572 dp = NULLVP;
1573 goto bad;
1574 }
1575
1576 ndp->ni_dvp = dp;
1577 dp_authorized = 0;
1578 goto unionlookup;
1579 }
1580 #endif /* CONFIG_UNION_MOUNTS */
1581
1582 if (error != EJUSTRETURN) {
1583 goto bad;
1584 }
1585
1586 if (ndp->ni_vp != NULLVP) {
1587 panic("leaf should be empty");
1588 }
1589
1590 #if NAMEDRSRCFORK
1591 /*
1592 * At this point, error should be EJUSTRETURN.
1593 *
1594 * If CN_WANTSRSRCFORK is set, that implies that the
1595 * underlying filesystem could not find the "parent" of the
1596 * resource fork (the data fork), and we are doing a lookup
1597 * for a CREATE event.
1598 *
1599 * However, this should be converted to an error, as the
1600 * failure to find this parent should disallow further
1601 * progress to try and acquire a resource fork vnode.
1602 */
1603 if (cnp->cn_flags & CN_WANTSRSRCFORK) {
1604 error = ENOENT;
1605 goto bad;
1606 }
1607 #endif
1608
1609 error = lookup_validate_creation_path(ndp);
1610 if (error) {
1611 goto bad;
1612 }
1613 /*
1614 * We return with ni_vp NULL to indicate that the entry
1615 * doesn't currently exist, leaving a pointer to the
1616 * referenced directory vnode in ndp->ni_dvp.
1617 */
1618 if (cnp->cn_flags & SAVESTART) {
1619 if ((vnode_get(ndp->ni_dvp))) {
1620 error = ENOENT;
1621 goto bad;
1622 }
1623 ndp->ni_startdir = ndp->ni_dvp;
1624 }
1625 if (!wantparent) {
1626 vnode_put(ndp->ni_dvp);
1627 }
1628
1629 if (kdebug_enable) {
1630 kdebug_lookup(ndp->ni_dvp, cnp);
1631 }
1632 return 0;
1633 }
1634 returned_from_lookup_path:
1635 /* We'll always have an iocount on ni_vp when this finishes. */
1636 error = lookup_handle_found_vnode(ndp, cnp, rdonly, vbusyflags, &keep_going, nc_generation, wantparent, atroot, ctx);
1637 if (error != 0) {
1638 goto bad2;
1639 }
1640
1641 if (keep_going) {
1642 dp = ndp->ni_vp;
1643
1644 /* namei() will handle symlinks */
1645 if ((dp->v_type == VLNK) &&
1646 ((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) {
1647 return 0;
1648 }
1649
1650 /*
1651 * Otherwise, there's more path to process.
1652 * cache_lookup_path is now responsible for dropping io ref on dp
1653 * when it is called again in the dirloop. This ensures we hold
1654 * a ref on dp until we complete the next round of lookup.
1655 */
1656 last_dp = dp;
1657
1658 goto dirloop;
1659 }
1660
1661 return 0;
1662 bad2:
1663 if (ndp->ni_dvp) {
1664 vnode_put(ndp->ni_dvp);
1665 }
1666
1667 vnode_put(ndp->ni_vp);
1668 ndp->ni_vp = NULLVP;
1669
1670 if (kdebug_enable) {
1671 kdebug_lookup(dp, cnp);
1672 }
1673 return error;
1674
1675 bad:
1676 if (dp) {
1677 vnode_put(dp);
1678 }
1679 ndp->ni_vp = NULLVP;
1680
1681 if (kdebug_enable) {
1682 kdebug_lookup(dp, cnp);
1683 }
1684 return error;
1685 }
1686
1687 #if CONFIG_UNION_MOUNTS
1688 /*
1689 * Given a vnode in a union mount, traverse to the equivalent
1690 * vnode in the underlying mount.
1691 */
1692 int
1693 lookup_traverse_union(vnode_t dvp, vnode_t *new_dvp, vfs_context_t ctx)
1694 {
1695 char *path = NULL, *pp;
1696 const char *name, *np;
1697 size_t len;
1698 int error = 0;
1699 struct nameidata nd;
1700 vnode_t vp = dvp;
1701
1702 *new_dvp = NULL;
1703
1704 if (vp && vp->v_flag & VROOT) {
1705 *new_dvp = vp->v_mount->mnt_vnodecovered;
1706 if (vnode_getwithref(*new_dvp)) {
1707 return ENOENT;
1708 }
1709 return 0;
1710 }
1711
1712 path = namei_alloc(MAXPATHLEN);
1713
1714 /*
1715 * Walk back up to the mountpoint following the
1716 * v_parent chain and build a slash-separated path.
1717 * Then lookup that path starting with the covered vnode.
1718 */
1719 pp = path + (MAXPATHLEN - 1);
1720 *pp = '\0';
1721
1722 while (1) {
1723 name = vnode_getname(vp);
1724 if (name == NULL) {
1725 printf("lookup_traverse_union: null parent name: .%s\n", pp);
1726 error = ENOENT;
1727 goto done;
1728 }
1729 len = strlen(name);
1730 if ((len + 1) > (size_t)(pp - path)) { // Enough space for this name ?
1731 error = ENAMETOOLONG;
1732 vnode_putname(name);
1733 goto done;
1734 }
1735 for (np = name + len; len > 0; len--) { // Copy name backwards
1736 *--pp = *--np;
1737 }
1738 vnode_putname(name);
1739 vp = vp->v_parent;
1740 if (vp == NULLVP || vp->v_flag & VROOT) {
1741 break;
1742 }
1743 *--pp = '/';
1744 }
1745
1746 /* Evaluate the path in the underlying mount */
1747 NDINIT(&nd, LOOKUP, OP_LOOKUP, USEDVP, UIO_SYSSPACE, CAST_USER_ADDR_T(pp), ctx);
1748 nd.ni_dvp = dvp->v_mount->mnt_vnodecovered;
1749 error = namei(&nd);
1750 if (error == 0) {
1751 *new_dvp = nd.ni_vp;
1752 }
1753 nameidone(&nd);
1754 done:
1755 if (path) {
1756 namei_free(path, MAXPATHLEN);
1757 }
1758 return error;
1759 }
1760 #endif /* CONFIG_UNION_MOUNTS */
1761
1762 int
1763 lookup_validate_creation_path(struct nameidata *ndp)
1764 {
1765 struct componentname *cnp = &ndp->ni_cnd;
1766
1767 /*
1768 * If creating and at end of pathname, then can consider
1769 * allowing file to be created.
1770 */
1771 if (cnp->cn_flags & RDONLY) {
1772 return EROFS;
1773 }
1774 if ((cnp->cn_flags & ISLASTCN) && (ndp->ni_flag & NAMEI_TRAILINGSLASH) && !(cnp->cn_flags & WILLBEDIR)) {
1775 return ENOENT;
1776 }
1777
1778 return 0;
1779 }
1780
1781 /*
1782 * Modifies only ni_vp. Always returns with ni_vp still valid (iocount held).
1783 */
1784 static int
1785 lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
1786 int vbusyflags, vfs_context_t ctx)
1787 {
1788 mount_t mp;
1789 vnode_t tdp;
1790 int error = 0;
1791 uint32_t depth = 0;
1792 vnode_t mounted_on_dp;
1793 int current_mount_generation = 0;
1794 #if CONFIG_TRIGGERS
1795 vnode_t triggered_dp = NULLVP;
1796 int retry_cnt = 0;
1797 #define MAX_TRIGGER_RETRIES 1
1798 #endif
1799
1800 if (dp->v_type != VDIR || cnp->cn_flags & NOCROSSMOUNT) {
1801 return 0;
1802 }
1803
1804 mounted_on_dp = dp;
1805 #if CONFIG_TRIGGERS
1806 restart:
1807 #endif
1808 current_mount_generation = mount_generation;
1809
1810 while (dp->v_mountedhere) {
1811 vnode_lock_spin(dp);
1812 if ((mp = dp->v_mountedhere)) {
1813 mp->mnt_crossref++;
1814 vnode_unlock(dp);
1815 } else {
1816 vnode_unlock(dp);
1817 break;
1818 }
1819
1820 if ((ndp->ni_flag & NAMEI_RESOLVE_BENEATH) && (cnp->cn_flags & ISDOTDOT) && (dp == ndp->ni_usedvp)) {
1821 /* Ensure ".." doesn't escape after mount point traversal */
1822 mount_dropcrossref(mp, dp, 0);
1823 error = ENOTCAPABLE;
1824 goto out;
1825 }
1826
1827 if (ISSET(mp->mnt_lflag, MNT_LFORCE)) {
1828 mount_dropcrossref(mp, dp, 0);
1829 break; // don't traverse into a forced unmount
1830 }
1831
1832 if ((ndp->ni_flag & NAMEI_LOCAL) && !(mp->mnt_flag & MNT_LOCAL)) {
1833 /* Prevent a path lookup from ever crossing into a network filesystem */
1834 mount_dropcrossref(mp, dp, 0);
1835 error = ENOTCAPABLE;
1836 goto out;
1837 }
1838 if ((ndp->ni_flag & NAMEI_NODEVFS) && (strcmp(mp->mnt_vfsstat.f_fstypename, "devfs") == 0)) {
1839 /* Prevent a path lookup into `devfs` filesystem */
1840 mount_dropcrossref(mp, dp, 0);
1841 error = ENOTCAPABLE;
1842 goto out;
1843 }
1844 if ((ndp->ni_flag & NAMEI_IMMOVABLE) && (mp->mnt_flag & MNT_REMOVABLE) && !(mp->mnt_kern_flag & MNTK_VIRTUALDEV)) {
1845 /* Prevent a path lookup into a removable filesystem */
1846 mount_dropcrossref(mp, dp, 0);
1847 error = ENOTCAPABLE;
1848 goto out;
1849 }
1850
1851 if (vfs_busy(mp, vbusyflags)) {
1852 mount_dropcrossref(mp, dp, 0);
1853 if (vbusyflags == LK_NOWAIT) {
1854 error = ENOENT;
1855 goto out;
1856 }
1857
1858 continue;
1859 }
1860
1861 error = VFS_ROOT(mp, &tdp, ctx);
1862
1863 mount_dropcrossref(mp, dp, 0);
1864 vfs_unbusy(mp);
1865
1866 if (error) {
1867 goto out;
1868 }
1869
1870 vnode_put(dp);
1871 ndp->ni_vp = dp = tdp;
1872 if (dp->v_type != VDIR) {
1873 #if DEVELOPMENT || DEBUG
1874 panic("%s : Root of filesystem not a directory",
1875 __FUNCTION__);
1876 #else
1877 break;
1878 #endif
1879 }
1880 depth++;
1881 }
1882
1883 #if CONFIG_TRIGGERS
1884 /*
1885 * The triggered_dp check here is required but is susceptible to a
1886 * (unlikely) race in which trigger mount is done from here and is
1887 * unmounted before we get past vfs_busy above. We retry to deal with
1888 * that case but it has the side effect of unwanted retries for
1889 * "special" processes which don't want to trigger mounts.
1890 */
1891 if (dp->v_resolve && retry_cnt < MAX_TRIGGER_RETRIES) {
1892 error = vnode_trigger_resolve(dp, ndp, ctx);
1893 if (error) {
1894 goto out;
1895 }
1896 if (dp == triggered_dp) {
1897 retry_cnt += 1;
1898 } else {
1899 retry_cnt = 0;
1900 }
1901 triggered_dp = dp;
1902 goto restart;
1903 }
1904 #endif /* CONFIG_TRIGGERS */
1905
1906 if (depth) {
1907 mp = mounted_on_dp->v_mountedhere;
1908
1909 if (mp) {
1910 mount_lock_spin(mp);
1911 mp->mnt_realrootvp_vid = dp->v_id;
1912 mp->mnt_realrootvp = dp;
1913 mp->mnt_generation = current_mount_generation;
1914 mount_unlock(mp);
1915 }
1916 }
1917
1918 return 0;
1919
1920 out:
1921 return error;
1922 }
1923
1924 /*
1925 * Takes ni_vp and ni_dvp non-NULL. Returns with *new_dp set to the location
1926 * at which to start a lookup with a resolved path, and all other iocounts dropped.
1927 */
1928 static int
1929 lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, bool *new_dp_has_iocount, vfs_context_t ctx)
1930 {
1931 int error;
1932 char *cp = NULL; /* pointer into pathname argument */
1933 u_int cplen = 0;
1934 uio_t auio;
1935 UIO_STACKBUF(uio_buf, 1);
1936 int need_newpathbuf;
1937 u_int linklen = 0;
1938 struct componentname *cnp = &ndp->ni_cnd;
1939 vnode_t dp;
1940 char *tmppn;
1941 u_int rsrclen = (cnp->cn_flags & CN_WANTSRSRCFORK) ? sizeof(_PATH_RSRCFORKSPEC) : 0;
1942 bool dp_has_iocount = false;
1943
1944 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
1945 #if CONFIG_MACF
1946 if (mac_vnode_check_stat(ctx, NOCRED, ndp->ni_vp) == EPERM) {
1947 return EPERM;
1948 }
1949 #endif /* CONFIG_MACF */
1950 return ELOOP;
1951 }
1952 #if CONFIG_MACF
1953 if ((error = mac_vnode_check_readlink(ctx, ndp->ni_vp)) != 0) {
1954 return error;
1955 }
1956 #endif /* MAC */
1957 if (ndp->ni_pathlen > 1 || !(cnp->cn_flags & HASBUF)) {
1958 need_newpathbuf = 1;
1959 } else {
1960 need_newpathbuf = 0;
1961 }
1962
1963 if (need_newpathbuf) {
1964 if (!(cnp->cn_flags & HASBUF) || cnp->cn_pnlen == MAXPATHLEN) {
1965 cplen = MAXPATHLEN;
1966 } else {
1967 assert(proc_support_long_paths(vfs_context_proc(ctx)));
1968 cplen = cnp->cn_pnlen;
1969 }
1970 cp = namei_alloc(cplen);
1971 } else {
1972 cp = cnp->cn_pnbuf;
1973 }
1974 auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
1975
1976 uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN);
1977
1978 error = VNOP_READLINK(ndp->ni_vp, auio, ctx);
1979
1980 if (!error) {
1981 user_ssize_t resid = uio_resid(auio);
1982
1983 assert(resid <= MAXPATHLEN);
1984
1985 if (resid == MAXPATHLEN) {
1986 linklen = 0;
1987 } else {
1988 /*
1989 * Safe to set unsigned with a [larger] signed type here
1990 * because 0 <= uio_resid <= MAXPATHLEN and MAXPATHLEN
1991 * is only 1024.
1992 */
1993 linklen = (u_int)strnlen(cp, MAXPATHLEN - (u_int)resid);
1994 }
1995
1996 size_t maxlen = proc_support_long_paths(vfs_context_proc(ctx)) ? MAXLONGPATHLEN : MAXPATHLEN;
1997
1998 if (linklen == 0) {
1999 error = ENOENT;
2000 } else if (linklen + ndp->ni_pathlen + rsrclen > maxlen) {
2001 error = ENAMETOOLONG;
2002 }
2003 }
2004
2005 if (error) {
2006 if (need_newpathbuf) {
2007 namei_free(cp, cplen);
2008 }
2009 return error;
2010 }
2011
2012 if (need_newpathbuf) {
2013 tmppn = cnp->cn_pnbuf;
2014 u_int tmplen = cnp->cn_pnlen;
2015 bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
2016 cnp->cn_pnbuf = cp;
2017 cnp->cn_pnlen = cplen;
2018
2019 if ((cnp->cn_flags & HASBUF)) {
2020 namei_free(tmppn, tmplen);
2021 } else {
2022 cnp->cn_flags |= HASBUF;
2023 }
2024 } else {
2025 cnp->cn_pnbuf[linklen] = '\0';
2026 }
2027
2028 ndp->ni_pathlen += linklen;
2029 cnp->cn_nameptr = cnp->cn_pnbuf;
2030
2031 /*
2032 * starting point for 'relative'
2033 * symbolic link path
2034 */
2035 dp = ndp->ni_dvp;
2036
2037 /*
2038 * get rid of reference returned via 'lookup'
2039 * ni_dvp is released only if we restart at /.
2040 */
2041 vnode_put(ndp->ni_vp);
2042 ndp->ni_vp = NULLVP;
2043 ndp->ni_dvp = NULLVP;
2044
2045 dp_has_iocount = true;
2046
2047 /*
2048 * Check if symbolic link restarts us at the root
2049 */
2050 if (*(cnp->cn_nameptr) == '/') {
2051 /* return ENOTCAPABLE if resolve beneath and the symlink restarts at root */
2052 if (ndp->ni_flag & NAMEI_RESOLVE_BENEATH) {
2053 vnode_put(dp); /* ALWAYS have a dvp for a symlink */
2054 return ENOTCAPABLE;
2055 }
2056 while (*(cnp->cn_nameptr) == '/') {
2057 cnp->cn_nameptr++;
2058 ndp->ni_pathlen--;
2059 }
2060 if (linklen != 0) {
2061 vnode_put(dp); /* ALWAYS have a dvp for a symlink */
2062 dp_has_iocount = false;
2063 if ((dp = ndp->ni_rootdir) == NULLVP) {
2064 return ENOENT;
2065 }
2066 }
2067 }
2068
2069 *new_dp = dp;
2070 *new_dp_has_iocount = dp_has_iocount;
2071
2072 return 0;
2073 }
2074
2075 /*
2076 * relookup - lookup a path name component
2077 * Used by lookup to re-aquire things.
2078 */
2079 int
2080 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
2081 {
2082 struct vnode *dp = NULL; /* the directory we are searching */
2083 int wantparent; /* 1 => wantparent or lockparent flag */
2084 int rdonly; /* lookup read-only flag bit */
2085 int error = 0;
2086 #ifdef NAMEI_DIAGNOSTIC
2087 int i, newhash; /* DEBUG: check name hash */
2088 char *cp; /* DEBUG: check name ptr/len */
2089 #endif
2090 vfs_context_t ctx = cnp->cn_context;
2091
2092 /*
2093 * Setup: break out flag bits into variables.
2094 */
2095 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
2096 rdonly = cnp->cn_flags & RDONLY;
2097 cnp->cn_flags &= ~ISSYMLINK;
2098
2099 if (cnp->cn_flags & NOCACHE) {
2100 cnp->cn_flags &= ~MAKEENTRY;
2101 } else {
2102 cnp->cn_flags |= MAKEENTRY;
2103 }
2104
2105 dp = dvp;
2106
2107 /*
2108 * Check for degenerate name (e.g. / or "")
2109 * which is a way of talking about a directory,
2110 * e.g. like "/." or ".".
2111 */
2112 if (cnp->cn_nameptr[0] == '\0') {
2113 if (cnp->cn_nameiop != LOOKUP || wantparent) {
2114 error = EISDIR;
2115 goto bad;
2116 }
2117 if (dp->v_type != VDIR) {
2118 error = ENOTDIR;
2119 goto bad;
2120 }
2121 if ((vnode_get(dp))) {
2122 error = ENOENT;
2123 goto bad;
2124 }
2125 *vpp = dp;
2126
2127 if (cnp->cn_flags & SAVESTART) {
2128 panic("lookup: SAVESTART");
2129 }
2130 return 0;
2131 }
2132 /*
2133 * We now have a segment name to search for, and a directory to search.
2134 */
2135 if ((error = VNOP_LOOKUP(dp, vpp, cnp, ctx))) {
2136 if (error != EJUSTRETURN) {
2137 goto bad;
2138 }
2139 #if DIAGNOSTIC
2140 if (*vpp != NULL) {
2141 panic("leaf should be empty");
2142 }
2143 #endif
2144 /*
2145 * If creating and at end of pathname, then can consider
2146 * allowing file to be created.
2147 */
2148 if (rdonly) {
2149 error = EROFS;
2150 goto bad;
2151 }
2152 /*
2153 * We return with ni_vp NULL to indicate that the entry
2154 * doesn't currently exist, leaving a pointer to the
2155 * (possibly locked) directory inode in ndp->ni_dvp.
2156 */
2157 return 0;
2158 }
2159 dp = *vpp;
2160
2161 #if DIAGNOSTIC
2162 /*
2163 * Check for symbolic link
2164 */
2165 if (dp->v_type == VLNK && (cnp->cn_flags & FOLLOW)) {
2166 panic("relookup: symlink found.");
2167 }
2168 #endif
2169
2170 /*
2171 * Disallow directory write attempts on read-only file systems.
2172 */
2173 if (rdonly &&
2174 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
2175 error = EROFS;
2176 goto bad2;
2177 }
2178 /* ASSERT(dvp == ndp->ni_startdir) */
2179
2180 return 0;
2181
2182 bad2:
2183 vnode_put(dp);
2184 bad:
2185 *vpp = NULL;
2186
2187 return error;
2188 }
2189
2190 /*
2191 * Free pathname buffer
2192 */
2193 void
2194 nameidone(struct nameidata *ndp)
2195 {
2196 if (ndp->ni_cnd.cn_flags & HASBUF) {
2197 char *tmp = ndp->ni_cnd.cn_pnbuf;
2198
2199 ndp->ni_cnd.cn_pnbuf = NULL;
2200 ndp->ni_cnd.cn_flags &= ~HASBUF;
2201 namei_free(tmp, ndp->ni_cnd.cn_pnlen);
2202 }
2203 }
2204
2205
2206 /*
2207 * Log (part of) a pathname using kdebug, as used by fs_usage. The path up to
2208 * and including the current component name are logged. Up to NUMPARMS * 4
2209 * bytes of pathname will be logged. If the path to be logged is longer than
2210 * that, then the last NUMPARMS * 4 bytes are logged. That is, the truncation
2211 * removes the leading portion of the path.
2212 *
2213 * The logging is done via multiple KDBG_RELEASE calls. The first one is marked
2214 * with DBG_FUNC_START. The last one is marked with DBG_FUNC_END (in addition
2215 * to DBG_FUNC_START if it is also the first). There may be intermediate ones
2216 * with neither DBG_FUNC_START nor DBG_FUNC_END.
2217 *
2218 * The first event passes the vnode pointer and 24 or 32 (on K32, 12 or 24)
2219 * bytes of pathname. The remaining events add 32 (on K32, 16) bytes of
2220 * pathname each. The minimum number of events required to pass the path are
2221 * used. Any excess padding in the final event (because not all of the 24 or 32
2222 * (on K32, 12 or 16) bytes are needed for the remainder of the path) is set to
2223 * zero bytes, or '>' if there is more path beyond the current component name
2224 * (usually because an intermediate component was not found).
2225 *
2226 * NOTE: If the path length is greater than NUMPARMS * 4, or is not of the form
2227 * 24 + N * 32 (or on K32, 12 + N * 16), there will be no padding.
2228 */
2229 #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST)
2230
2231 void
2232 kdebug_vfs_lookup(const char *path, size_t path_len, void *vnp,
2233 uint32_t flags)
2234 {
2235 unsigned long path_words[4] = {};
2236 size_t trace_len = MIN(sizeof(path_words) - sizeof(path_words[0]), path_len);
2237 size_t path_next = 0;
2238 bool noprocfilt = flags & KDBG_VFS_LOOKUP_FLAG_NOPROCFILT;
2239
2240 assert(path_len >= 0);
2241
2242 int code = ((flags & KDBG_VFS_LOOKUP_FLAG_LOOKUP) ? VFS_LOOKUP :
2243 VFS_LOOKUP_DONE) | DBG_FUNC_START;
2244
2245 if (path_len <= (3 * (int)sizeof(long))) {
2246 code |= DBG_FUNC_END;
2247 }
2248 memcpy(path_words, path, trace_len);
2249 path_next += trace_len;
2250
2251 if (noprocfilt) {
2252 KDBG_RELEASE_NOPROCFILT(code, kdebug_vnode(vnp), path_words[0],
2253 path_words[1], path_words[2]);
2254 } else {
2255 KDBG_RELEASE(code, kdebug_vnode(vnp), path_words[0], path_words[1],
2256 path_words[2]);
2257 }
2258
2259 code &= ~DBG_FUNC_START;
2260
2261 for (int i = 3; i * (int)sizeof(long) < path_len; i += 4) {
2262 trace_len = sizeof(path_words);
2263 if ((i + 4) * (int)sizeof(long) >= path_len) {
2264 code |= DBG_FUNC_END;
2265 trace_len = path_len - path_next;
2266 memset(path_words, 0, sizeof(path_words));
2267 }
2268 memcpy(path_words, &path[path_next], trace_len);
2269 path_next += trace_len;
2270
2271 if (noprocfilt) {
2272 KDBG_RELEASE_NOPROCFILT(code, path_words[0], path_words[1],
2273 path_words[2], path_words[3]);
2274 } else {
2275 KDBG_RELEASE(code, path_words[0], path_words[1],
2276 path_words[2], path_words[3]);
2277 }
2278 }
2279 }
2280
2281 void
2282 kdebug_lookup_gen_events(long *path_words, int path_len, void *vnp, bool lookup)
2283 {
2284 assert(path_len >= 0);
2285 kdebug_vfs_lookup((const char *)path_words, path_len, vnp,
2286 lookup ? KDBG_VFS_LOOKUP_FLAG_LOOKUP : 0);
2287 }
2288
2289 void
2290 kdebug_lookup(vnode_t vnp, struct componentname *cnp)
2291 {
2292 kdebug_vfs_lookup(cnp->cn_pnbuf, strnlen(cnp->cn_pnbuf, cnp->cn_pnlen), vnp, KDBG_VFS_LOOKUP_FLAG_LOOKUP);
2293 }
2294
2295 #else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */
2296
2297 void
2298 kdebug_vfs_lookup(const char *dbg_parms __unused, size_t dbg_namelen __unused,
2299 void *dp __unused, __unused kdebug_vfs_lookup_flags_t flags)
2300 {
2301 }
2302
2303 static void
2304 kdebug_lookup(struct vnode *dp __unused, struct componentname *cnp __unused)
2305 {
2306 }
2307 #endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */
2308
2309 int
2310 vfs_getbyid(fsid_t *fsid, ino64_t ino, vnode_t *vpp, vfs_context_t ctx)
2311 {
2312 mount_t mp;
2313 int error;
2314
2315 mp = mount_lookupby_volfsid(fsid->val[0], 1);
2316 if (mp == NULL) {
2317 return EINVAL;
2318 }
2319
2320 /* Get the target vnode. */
2321 if (ino == 2) {
2322 error = VFS_ROOT(mp, vpp, ctx);
2323 } else {
2324 error = VFS_VGET(mp, ino, vpp, ctx);
2325 }
2326
2327 vfs_unbusy(mp);
2328 return error;
2329 }
2330 /*
2331 * Obtain the real path from a legacy volfs style path.
2332 *
2333 * Valid formats of input path:
2334 *
2335 * "555/@"
2336 * "555/2"
2337 * "555/123456"
2338 * "555/123456/foobar"
2339 *
2340 * Where:
2341 * 555 represents the volfs file system id
2342 * '@' and '2' are aliases to the root of a file system
2343 * 123456 represents a file id
2344 * "foobar" represents a file name
2345 */
2346 #if CONFIG_VOLFS
2347 static int
2348 vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_t ctx, vnode_t rdvp)
2349 {
2350 vnode_t vp;
2351 struct mount *mp = NULL;
2352 char *str;
2353 char ch;
2354 unsigned long id;
2355 ino64_t ino;
2356 int error;
2357 int length;
2358
2359 /* Get file system id and move str to next component. */
2360 id = strtoul(path, &str, 10);
2361 if (id == 0 || str[0] != '/') {
2362 return EINVAL;
2363 }
2364 while (*str == '/') {
2365 str++;
2366 }
2367 ch = *str;
2368
2369 if (id > INT_MAX) {
2370 return ENOENT;
2371 }
2372 mp = mount_lookupby_volfsid((int)id, 1);
2373 if (mp == NULL) {
2374 return EINVAL; /* unexpected failure */
2375 }
2376 /* Check for an alias to a file system root. */
2377 if (ch == '@' && str[1] == '\0') {
2378 ino = 2;
2379 str++;
2380 } else {
2381 /* Get file id and move str to next component. */
2382 ino = strtouq(str, &str, 10);
2383 }
2384
2385 /* Get the target vnode. */
2386 if (ino == 2) {
2387 struct vfs_attr vfsattr;
2388 int use_vfs_root = TRUE;
2389
2390 VFSATTR_INIT(&vfsattr);
2391 VFSATTR_WANTED(&vfsattr, f_capabilities);
2392 if (vfs_getattr(mp, &vfsattr, vfs_context_kernel()) == 0 &&
2393 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
2394 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS) &&
2395 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS)) {
2396 use_vfs_root = FALSE;
2397 }
2398 }
2399
2400 if (use_vfs_root) {
2401 error = VFS_ROOT(mp, &vp, ctx);
2402 } else {
2403 error = VFS_VGET(mp, ino, &vp, ctx);
2404 }
2405 } else {
2406 error = VFS_VGET(mp, ino, &vp, ctx);
2407 }
2408 vfs_unbusy(mp);
2409 if (error) {
2410 goto out;
2411 }
2412 realpath[0] = '\0';
2413
2414 /* Check for and fail if the path is not under the chroot */
2415 if (rdvp != NULLVP) {
2416 int is_subdir = 0;
2417 vnode_t pvp = NULLVP;
2418
2419 /* Get the parent if vp is not a directory */
2420 if (!vnode_isdir(vp) && !(pvp = vnode_getparent(vp))) {
2421 error = EINVAL;
2422 vnode_put(vp);
2423 goto out;
2424 }
2425
2426 /* Check if a given directory vp/pvp is a subdirectory of rdvp */
2427 error = vnode_issubdir(pvp ? pvp : vp, rdvp, &is_subdir, ctx);
2428 if (pvp) {
2429 vnode_put(pvp);
2430 }
2431 if (error || !is_subdir) {
2432 if (!error) {
2433 /* Path is not under the chroot */
2434 error = EINVAL;
2435 }
2436 vnode_put(vp);
2437 goto out;
2438 }
2439 }
2440
2441 /* Get the absolute path to this vnode. */
2442 error = build_path(vp, realpath, (int)bufsize, &length, 0, ctx);
2443 vnode_put(vp);
2444
2445 if (error == 0 && *str != '\0') {
2446 size_t attempt = strlcat(realpath, str, MAXPATHLEN);
2447 if (attempt > MAXPATHLEN) {
2448 error = ENAMETOOLONG;
2449 }
2450 }
2451 out:
2452 return error;
2453 }
2454 #endif
2455
2456 void
2457 lookup_compound_vnop_post_hook(int error, vnode_t dvp, vnode_t vp, struct nameidata *ndp, int did_create)
2458 {
2459 if (error == 0 && vp == NULLVP) {
2460 panic("NULL vp with error == 0.");
2461 }
2462
2463 /*
2464 * We don't want to do any of this if we didn't use the compound vnop
2465 * to perform the lookup... i.e. if we're allowing and using the legacy pattern,
2466 * where we did a full lookup.
2467 */
2468 if ((ndp->ni_flag & NAMEI_COMPOUND_OP_MASK) == 0) {
2469 return;
2470 }
2471
2472 /*
2473 * If we're going to continue the lookup, we'll handle
2474 * all lookup-related updates at that time.
2475 */
2476 if (error == EKEEPLOOKING) {
2477 return;
2478 }
2479
2480 /*
2481 * Only audit or update cache for *found* vnodes. For creation
2482 * neither would happen in the non-compound-vnop case.
2483 */
2484 if ((vp != NULLVP) && !did_create) {
2485 /*
2486 * If MAKEENTRY isn't set, and we've done a successful compound VNOP,
2487 * then we certainly don't want to update cache or identity.
2488 */
2489 if ((error != 0) || (ndp->ni_cnd.cn_flags & MAKEENTRY)) {
2490 lookup_consider_update_cache(dvp, vp, &ndp->ni_cnd, ndp->ni_ncgeneration);
2491 }
2492 if (ndp->ni_cnd.cn_flags & AUDITVNPATH1) {
2493 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2494 } else if (ndp->ni_cnd.cn_flags & AUDITVNPATH2) {
2495 AUDIT_ARG(vnpath, vp, ARG_VNODE2);
2496 }
2497 }
2498
2499 /*
2500 * If you created (whether you opened or not), cut a lookup tracepoint
2501 * for the parent dir (as would happen without a compound vnop). Note: we may need
2502 * a vnode despite failure in this case!
2503 *
2504 * If you did not create:
2505 * Found child (succeeded or not): cut a tracepoint for the child.
2506 * Did not find child: cut a tracepoint with the parent.
2507 */
2508 if (kdebug_enable) {
2509 kdebug_lookup(vp ? vp : dvp, &ndp->ni_cnd);
2510 }
2511 }
2512