xref: /xnu-8792.61.2/bsd/nfs/nfs_syscalls.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1989, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * Rick Macklem at The University of Guelph.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
65  * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66  */
67 
68 #include <nfs/nfs_conf.h>
69 
70 /*
71  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
72  * support for mandatory and extensible security protections.  This notice
73  * is included in support of clause 2.2 (b) of the Apple Public License,
74  * Version 2.0.
75  */
76 
77 #include <sys/file_internal.h>
78 #include <sys/vnode_internal.h>
79 #include <sys/uio_internal.h>
80 #include <sys/sysctl.h>
81 #include <sys/socketvar.h>
82 #include <sys/sysproto.h>
83 #include <sys/fsevents.h>
84 #include <kern/task.h>
85 
86 #include <security/audit/audit.h>
87 
88 #include <netinet/in.h>
89 #include <netinet/tcp.h>
90 #include <nfs/xdr_subs.h>
91 #include <nfs/rpcv2.h>
92 #include <nfs/nfsproto.h>
93 #include <nfs/nfs.h>
94 #include <nfs/nfsm_subs.h>
95 #include <nfs/nfsrvcache.h>
96 #include <nfs/nfs_gss.h>
97 #if CONFIG_MACF
98 #include <security/mac_framework.h>
99 #endif
100 
101 #if CONFIG_NFS_SERVER
102 
103 extern const nfsrv_proc_t nfsrv_procs[NFS_NPROCS];
104 
105 extern int nfsrv_wg_delay;
106 extern int nfsrv_wg_delay_v3;
107 
108 static int nfsrv_require_resv_port = 0;
109 static time_t  nfsrv_idlesock_timer_on = 0;
110 static int nfsrv_sock_tcp_cnt = 0;
111 #define NFSD_MIN_IDLE_TIMEOUT 30
112 static int nfsrv_sock_idle_timeout = 3600; /* One hour */
113 
114 int     nfssvc_export(user_addr_t argp);
115 int     nfssvc_exportstats(proc_t p, user_addr_t argp);
116 int     nfssvc_userstats(proc_t p, user_addr_t argp);
117 int     nfssvc_usercount(proc_t p, user_addr_t argp);
118 int     nfssvc_zerostats(void);
119 int     nfssvc_srvstats(proc_t p, user_addr_t argp);
120 int     nfssvc_nfsd(void);
121 int     nfssvc_addsock(socket_t, mbuf_t);
122 void    nfsrv_zapsock(struct nfsrv_sock *);
123 void    nfsrv_slpderef(struct nfsrv_sock *);
124 void    nfsrv_slpfree(struct nfsrv_sock *);
125 
126 #endif /* CONFIG_NFS_SERVER */
127 
128 /*
129  * sysctl stuff
130  */
131 SYSCTL_DECL(_vfs_generic);
132 SYSCTL_EXTENSIBLE_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs hinge");
133 
134 #if CONFIG_NFS_SERVER
135 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs server hinge");
136 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
137 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
138 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
139 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
140 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
141 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
142 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
143 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
144 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
145 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_debug_ctl, 0, "");
146 #if CONFIG_FSE
147 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
148 #endif
149 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
150 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
151 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
152 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
153 #ifdef NFS_UC_Q_DEBUG
154 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
155 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
156 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
157 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
158 #endif
159 #endif /* CONFIG_NFS_SERVER */
160 
161 /* NFS hooks */
162 
163 /* NFS hooks variables */
164 struct nfs_hooks_in nfsh = {
165 	.f_vinvalbuf      = NULL,
166 	.f_buf_page_inval = NULL
167 };
168 
169 /* NFS hooks registration functions */
170 void
nfs_register_hooks(struct nfs_hooks_in * inh,struct nfs_hooks_out * outh)171 nfs_register_hooks(struct nfs_hooks_in *inh, struct nfs_hooks_out *outh)
172 {
173 	if (inh) {
174 		nfsh.f_vinvalbuf = inh->f_vinvalbuf;
175 		nfsh.f_buf_page_inval = inh->f_buf_page_inval;
176 	}
177 
178 	if (outh) {
179 		outh->f_get_bsdthreadtask_info = get_bsdthreadtask_info;
180 	}
181 }
182 
183 void
nfs_unregister_hooks(void)184 nfs_unregister_hooks(void)
185 {
186 	memset(&nfsh, 0, sizeof(nfsh));
187 }
188 
189 /* NFS hooks wrappers */
190 int
nfs_vinvalbuf(vnode_t vp,int flags,vfs_context_t ctx,int intrflg)191 nfs_vinvalbuf(vnode_t vp, int flags, vfs_context_t ctx, int intrflg)
192 {
193 	if (nfsh.f_vinvalbuf == NULL) {
194 		return 0;
195 	}
196 
197 	return nfsh.f_vinvalbuf(vp, flags, ctx, intrflg);
198 }
199 
200 int
nfs_buf_page_inval(vnode_t vp,off_t offset)201 nfs_buf_page_inval(vnode_t vp, off_t offset)
202 {
203 	if (nfsh.f_buf_page_inval == NULL) {
204 		return 0;
205 	}
206 
207 	return nfsh.f_buf_page_inval(vp, offset);
208 }
209 
210 #if !CONFIG_NFS_SERVER
211 #define __no_nfs_server_unused      __unused
212 #else
213 #define __no_nfs_server_unused      /* nothing */
214 #endif
215 
216 /*
217  * NFS server system calls
218  * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
219  */
220 
221 #if CONFIG_NFS_SERVER
222 static struct nfs_exportfs *
nfsrv_find_exportfs(const char * ptr)223 nfsrv_find_exportfs(const char *ptr)
224 {
225 	struct nfs_exportfs *nxfs;
226 
227 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
228 		if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) {
229 			break;
230 		}
231 	}
232 	if (nxfs && strncmp(nxfs->nxfs_path, ptr, strlen(nxfs->nxfs_path))) {
233 		nxfs = NULL;
234 	}
235 
236 	return nxfs;
237 }
238 
239 #define DATA_VOLUME_MP "/System/Volumes/Data" // PLATFORM_DATA_VOLUME_MOUNT_POINT
240 
241 /*
242  * Get file handle system call
243  */
244 int
getfh(proc_t p __no_nfs_server_unused,struct getfh_args * uap __no_nfs_server_unused,__unused int * retval)245 getfh(
246 	proc_t p __no_nfs_server_unused,
247 	struct getfh_args *uap __no_nfs_server_unused,
248 	__unused int *retval)
249 {
250 	vnode_t vp;
251 	struct nfs_filehandle nfh;
252 	int error, fhlen = 0, fidlen;
253 	struct nameidata nd;
254 	char path[MAXPATHLEN], real_mntonname[MAXPATHLEN], *ptr;
255 	size_t datavol_len = strlen(DATA_VOLUME_MP);
256 	size_t pathlen;
257 	struct nfs_exportfs *nxfs;
258 	struct nfs_export *nx;
259 
260 	/*
261 	 * Must be super user
262 	 */
263 	error = proc_suser(p);
264 	if (error) {
265 		return error;
266 	}
267 
268 	error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
269 	if (!error) {
270 		error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
271 	}
272 	if (error) {
273 		return error;
274 	}
275 	/* limit fh size to length specified (or v3 size by default) */
276 	if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) {
277 		fhlen = NFSV3_MAX_FH_SIZE;
278 	}
279 	fidlen = fhlen - sizeof(struct nfs_exphandle);
280 
281 	if (!nfsrv_is_initialized()) {
282 		return EINVAL;
283 	}
284 
285 	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
286 	    UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
287 	error = namei(&nd);
288 	if (error) {
289 		return error;
290 	}
291 	nameidone(&nd);
292 
293 	vp = nd.ni_vp;
294 
295 	// find exportfs that matches f_mntonname
296 	lck_rw_lock_shared(&nfsrv_export_rwlock);
297 	ptr = vfs_statfs(vnode_mount(vp))->f_mntonname;
298 	if ((nxfs = nfsrv_find_exportfs(ptr)) == NULL) {
299 		/*
300 		 * The f_mntonname might be a firmlink path.  Resolve
301 		 * it into a physical path and try again.
302 		 */
303 		size_t pathbuflen = MAXPATHLEN;
304 		vnode_t rvp;
305 
306 		error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
307 		if (error) {
308 			goto out;
309 		}
310 		error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen,
311 		    VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
312 		vnode_put(rvp);
313 		if (error) {
314 			goto out;
315 		}
316 		ptr = real_mntonname;
317 		nxfs = nfsrv_find_exportfs(ptr);
318 	}
319 	if (nxfs == NULL) {
320 		error = EINVAL;
321 		goto out;
322 	}
323 	// find export that best matches remainder of path
324 	if (!strncmp(path, nxfs->nxfs_path, strlen(nxfs->nxfs_path))) {
325 		ptr = path + strlen(nxfs->nxfs_path);
326 	} else if (!strncmp(path, DATA_VOLUME_MP, datavol_len) && !strncmp(path + datavol_len, nxfs->nxfs_path, strlen(nxfs->nxfs_path))) {
327 		ptr = path + datavol_len + strlen(nxfs->nxfs_path);
328 	} else {
329 		error = EINVAL;
330 		goto out;
331 	}
332 
333 	while (*ptr && (*ptr == '/')) {
334 		ptr++;
335 	}
336 	LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
337 		size_t len = strlen(nx->nx_path);
338 		if (len == 0) { // we've hit the export entry for the root directory
339 			break;
340 		}
341 		if (!strncmp(nx->nx_path, ptr, len)) {
342 			break;
343 		}
344 	}
345 	if (!nx) {
346 		error = EINVAL;
347 		goto out;
348 	}
349 
350 	bzero(&nfh, sizeof(nfh));
351 	nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
352 	nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
353 	nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
354 	nfh.nfh_xh.nxh_flags = 0;
355 	nfh.nfh_xh.nxh_reserved = 0;
356 	nfh.nfh_len = fidlen;
357 	error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
358 	if (nfh.nfh_len > (uint32_t)fidlen) {
359 		error = EOVERFLOW;
360 	}
361 	nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
362 	nfh.nfh_len += sizeof(nfh.nfh_xh);
363 	nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
364 
365 out:
366 	lck_rw_done(&nfsrv_export_rwlock);
367 	vnode_put(vp);
368 	if (error) {
369 		return error;
370 	}
371 	/*
372 	 * At first blush, this may appear to leak a kernel stack
373 	 * address, but the copyout() never reaches &nfh.nfh_fhp
374 	 * (sizeof(fhandle_t) < sizeof(nfh)).
375 	 */
376 	error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
377 	return error;
378 }
379 
380 extern const struct fileops vnops;
381 
382 /*
383  * syscall for the rpc.lockd to use to translate a NFS file handle into
384  * an open descriptor.
385  *
386  * warning: do not remove the suser() call or this becomes one giant
387  * security hole.
388  */
389 int
fhopen(proc_t p __no_nfs_server_unused,struct fhopen_args * uap __no_nfs_server_unused,int32_t * retval __no_nfs_server_unused)390 fhopen(proc_t p __no_nfs_server_unused,
391     struct fhopen_args *uap __no_nfs_server_unused,
392     int32_t *retval __no_nfs_server_unused)
393 {
394 	vnode_t vp;
395 	struct nfs_filehandle nfh;
396 	struct nfs_export *nx;
397 	struct nfs_export_options *nxo;
398 	struct flock lf;
399 	struct fileproc *fp, *nfp;
400 	int fmode, error, type;
401 	int indx;
402 	vfs_context_t ctx = vfs_context_current();
403 	kauth_action_t action;
404 
405 	/*
406 	 * Must be super user
407 	 */
408 	error = suser(vfs_context_ucred(ctx), 0);
409 	if (error) {
410 		return error;
411 	}
412 
413 	if (!nfsrv_is_initialized()) {
414 		return EINVAL;
415 	}
416 
417 	fmode = FFLAGS(uap->flags);
418 	/* why not allow a non-read/write open for our lockd? */
419 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) {
420 		return EINVAL;
421 	}
422 
423 	error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
424 	if (error) {
425 		return error;
426 	}
427 	if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
428 	    (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) {
429 		return EINVAL;
430 	}
431 	error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
432 	if (error) {
433 		return error;
434 	}
435 	nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
436 
437 	lck_rw_lock_shared(&nfsrv_export_rwlock);
438 	/* now give me my vnode, it gets returned to me with a reference */
439 	error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
440 	lck_rw_done(&nfsrv_export_rwlock);
441 	if (error) {
442 		if (error == NFSERR_TRYLATER) {
443 			error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
444 		}
445 		return error;
446 	}
447 
448 	/*
449 	 * From now on we have to make sure not
450 	 * to forget about the vnode.
451 	 * Any error that causes an abort must vnode_put(vp).
452 	 * Just set error = err and 'goto bad;'.
453 	 */
454 
455 	/*
456 	 * from vn_open
457 	 */
458 	if (vnode_vtype(vp) == VSOCK) {
459 		error = EOPNOTSUPP;
460 		goto bad;
461 	}
462 
463 	/* disallow write operations on directories */
464 	if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
465 		error = EISDIR;
466 		goto bad;
467 	}
468 
469 #if CONFIG_MACF
470 	if ((error = mac_vnode_check_open(ctx, vp, fmode))) {
471 		goto bad;
472 	}
473 #endif
474 
475 	/* compute action to be authorized */
476 	action = 0;
477 	if (fmode & FREAD) {
478 		action |= KAUTH_VNODE_READ_DATA;
479 	}
480 	if (fmode & (FWRITE | O_TRUNC)) {
481 		action |= KAUTH_VNODE_WRITE_DATA;
482 	}
483 	if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) {
484 		goto bad;
485 	}
486 
487 	if ((error = VNOP_OPEN(vp, fmode, ctx))) {
488 		goto bad;
489 	}
490 	if ((error = vnode_ref_ext(vp, fmode, 0))) {
491 		goto bad;
492 	}
493 
494 	/*
495 	 * end of vn_open code
496 	 */
497 
498 	// starting here... error paths should call vn_close/vnode_put
499 	if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
500 		vn_close(vp, fmode & FMASK, ctx);
501 		goto bad;
502 	}
503 	fp = nfp;
504 
505 	fp->fp_glob->fg_flag = fmode & FMASK;
506 	fp->fp_glob->fg_ops = &vnops;
507 	fp_set_data(fp, vp);
508 
509 	// XXX do we really need to support this with fhopen()?
510 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
511 		lf.l_whence = SEEK_SET;
512 		lf.l_start = 0;
513 		lf.l_len = 0;
514 		if (fmode & O_EXLOCK) {
515 			lf.l_type = F_WRLCK;
516 		} else {
517 			lf.l_type = F_RDLCK;
518 		}
519 		type = F_FLOCK;
520 		if ((fmode & FNONBLOCK) == 0) {
521 			type |= F_WAIT;
522 		}
523 		if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf, type, ctx, NULL))) {
524 			struct vfs_context context = *vfs_context_current();
525 			/* Modify local copy (to not damage thread copy) */
526 			context.vc_ucred = fp->fp_glob->fg_cred;
527 
528 			vn_close(vp, fp->fp_glob->fg_flag, &context);
529 			fp_free(p, indx, fp);
530 			goto bad;
531 		}
532 		fp->fp_glob->fg_flag |= FWASLOCKED;
533 	}
534 
535 	vnode_put(vp);
536 
537 	proc_fdlock(p);
538 	procfdtbl_releasefd(p, indx, NULL);
539 	fp_drop(p, indx, fp, 1);
540 	proc_fdunlock(p);
541 
542 	*retval = indx;
543 	return 0;
544 
545 bad:
546 	vnode_put(vp);
547 	return error;
548 }
549 
550 /*
551  * NFS server pseudo system call
552  */
553 int
nfssvc(proc_t p __no_nfs_server_unused,struct nfssvc_args * uap __no_nfs_server_unused,__unused int * retval)554 nfssvc(proc_t p __no_nfs_server_unused,
555     struct nfssvc_args *uap __no_nfs_server_unused,
556     __unused int *retval)
557 {
558 	mbuf_t nam;
559 	struct user_nfsd_args user_nfsdarg;
560 	socket_t so;
561 	int error;
562 
563 	AUDIT_ARG(cmd, uap->flag);
564 
565 	/*
566 	 * Must be super user for NFSSVC_NFSD and NFSSVC_ADDSOCK operations.
567 	 */
568 	if (((uap->flag == NFSSVC_NFSD) || (uap->flag == NFSSVC_ADDSOCK)) && ((error = proc_suser(p)))) {
569 		return error;
570 	}
571 #if CONFIG_MACF
572 	error = mac_system_check_nfsd(kauth_cred_get());
573 	if (error) {
574 		return error;
575 	}
576 #endif
577 
578 	/* make sure NFS server data structures have been initialized */
579 	nfsrv_init();
580 
581 	if (uap->flag & NFSSVC_ADDSOCK) {
582 		if (IS_64BIT_PROCESS(p)) {
583 			error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
584 		} else {
585 			struct nfsd_args    tmp_args;
586 			error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
587 			if (error == 0) {
588 				user_nfsdarg.sock = tmp_args.sock;
589 				user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
590 				user_nfsdarg.namelen = tmp_args.namelen;
591 			}
592 		}
593 		if (error) {
594 			return error;
595 		}
596 		/* get the socket */
597 		error = file_socket(user_nfsdarg.sock, &so);
598 		if (error) {
599 			return error;
600 		}
601 		/* Get the client address for connected sockets. */
602 		if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
603 			nam = NULL;
604 		} else {
605 			error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
606 			if (error) {
607 				/* drop the iocount file_socket() grabbed on the file descriptor */
608 				file_drop(user_nfsdarg.sock);
609 				return error;
610 			}
611 		}
612 		/*
613 		 * nfssvc_addsock() will grab a retain count on the socket
614 		 * to keep the socket from being closed when nfsd closes its
615 		 * file descriptor for it.
616 		 */
617 		error = nfssvc_addsock(so, nam);
618 		/* drop the iocount file_socket() grabbed on the file descriptor */
619 		file_drop(user_nfsdarg.sock);
620 	} else if (uap->flag & NFSSVC_NFSD) {
621 		error = nfssvc_nfsd();
622 	} else if (uap->flag & NFSSVC_EXPORT) {
623 		error = nfssvc_export(uap->argp);
624 	} else if (uap->flag & NFSSVC_EXPORTSTATS) {
625 		error = nfssvc_exportstats(p, uap->argp);
626 	} else if (uap->flag & NFSSVC_USERSTATS) {
627 		error = nfssvc_userstats(p, uap->argp);
628 	} else if (uap->flag & NFSSVC_USERCOUNT) {
629 		error = nfssvc_usercount(p, uap->argp);
630 	} else if (uap->flag & NFSSVC_ZEROSTATS) {
631 		error = nfssvc_zerostats();
632 	} else if (uap->flag & NFSSVC_SRVSTATS) {
633 		error = nfssvc_srvstats(p, uap->argp);
634 	} else {
635 		error = EINVAL;
636 	}
637 	if (error == EINTR || error == ERESTART) {
638 		error = 0;
639 	}
640 	return error;
641 }
642 
643 /*
644  * Adds a socket to the list for servicing by nfsds.
645  */
646 int
nfssvc_addsock(socket_t so,mbuf_t mynam)647 nfssvc_addsock(socket_t so, mbuf_t mynam)
648 {
649 	struct nfsrv_sock *slp;
650 	int error = 0, sodomain, sotype, soprotocol, on = 1;
651 	int first, sobufsize;
652 	struct timeval timeo;
653 	uint32_t sbmaxsize;
654 
655 	/* make sure mbuf constants are set up */
656 	if (!nfs_mbuf_mhlen) {
657 		nfs_mbuf_init();
658 	}
659 
660 	sock_gettype(so, &sodomain, &sotype, &soprotocol);
661 
662 	/* There should be only one UDP socket for each of IPv4 and IPv6 */
663 	if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
664 		mbuf_freem(mynam);
665 		return EEXIST;
666 	}
667 	if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
668 		mbuf_freem(mynam);
669 		return EEXIST;
670 	}
671 
672 	/* Set protocol options and reserve some space (for UDP). */
673 	if (sotype == SOCK_STREAM) {
674 		error = nfsrv_check_exports_allow_address(mynam);
675 		if (error) {
676 			log(LOG_INFO, "nfsvc_addsock:: nfsrv_check_exports_allow_address(myname) returned %d\n", error);
677 			mbuf_freem(mynam);
678 			return error;
679 		}
680 		sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
681 	}
682 	if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) {
683 		sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
684 	}
685 
686 	/* Calculate maximum supported socket buffers sizes */
687 	sbmaxsize = sb_max * MCLBYTES / (MSIZE + MCLBYTES);
688 
689 	/* Set socket buffer sizes for UDP/TCP */
690 	sobufsize = min(sbmaxsize, (sotype == SOCK_DGRAM) ? NFS_UDPSOCKBUF : NFSRV_TCPSOCKBUF);
691 	error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &sobufsize, sizeof(sobufsize));
692 	error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &sobufsize, sizeof(sobufsize));
693 
694 	if (error) {
695 		log(LOG_INFO, "nfssvc_addsock: socket buffer setting error(s) %d\n", error);
696 		error = 0;
697 	}
698 	sock_nointerrupt(so, 0);
699 
700 	/*
701 	 * Set socket send/receive timeouts.
702 	 * Receive timeout shouldn't matter, but setting the send timeout
703 	 * will make sure that an unresponsive client can't hang the server.
704 	 */
705 	timeo.tv_usec = 0;
706 	timeo.tv_sec = 1;
707 	error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
708 	timeo.tv_sec = 30;
709 	error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
710 	if (error) {
711 		log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
712 		error = 0;
713 	}
714 
715 	slp = kalloc_type(struct nfsrv_sock, Z_WAITOK | Z_ZERO | Z_NOFAIL);
716 	lck_rw_init(&slp->ns_rwlock, &nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
717 	lck_mtx_init(&slp->ns_wgmutex, &nfsrv_slp_mutex_group, LCK_ATTR_NULL);
718 
719 	lck_mtx_lock(&nfsd_mutex);
720 
721 	if (soprotocol == IPPROTO_UDP) {
722 		if (sodomain == AF_INET) {
723 			/* There should be only one UDP/IPv4 socket */
724 			if (nfsrv_udpsock) {
725 				lck_mtx_unlock(&nfsd_mutex);
726 				nfsrv_slpfree(slp);
727 				mbuf_freem(mynam);
728 				return EEXIST;
729 			}
730 			nfsrv_udpsock = slp;
731 		}
732 		if (sodomain == AF_INET6) {
733 			/* There should be only one UDP/IPv6 socket */
734 			if (nfsrv_udp6sock) {
735 				lck_mtx_unlock(&nfsd_mutex);
736 				nfsrv_slpfree(slp);
737 				mbuf_freem(mynam);
738 				return EEXIST;
739 			}
740 			nfsrv_udp6sock = slp;
741 		}
742 	}
743 
744 	/* add the socket to the list */
745 	first = TAILQ_EMPTY(&nfsrv_socklist);
746 	TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
747 	if (sotype == SOCK_STREAM) {
748 		nfsrv_sock_tcp_cnt++;
749 		if (nfsrv_sock_idle_timeout < 0) {
750 			nfsrv_sock_idle_timeout = 0;
751 		}
752 		if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) {
753 			nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
754 		}
755 		/*
756 		 * Possibly start or stop the idle timer. We only start the idle timer when
757 		 * we have more than 2 * nfsd_thread_max connections. If the idle timer is
758 		 * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
759 		 * the number of connections.
760 		 */
761 		if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
762 			if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
763 				if (nfsrv_idlesock_timer_on) {
764 					thread_call_cancel(nfsrv_idlesock_timer_call);
765 					nfsrv_idlesock_timer_on = 0;
766 				}
767 			} else {
768 				struct nfsrv_sock *old_slp;
769 				struct timeval now;
770 				microuptime(&now);
771 				time_t time_to_wait = nfsrv_sock_idle_timeout;
772 				/*
773 				 * Get the oldest tcp socket and calculate the
774 				 * earliest time for the next idle timer to fire
775 				 * based on the possibly updated nfsrv_sock_idle_timeout
776 				 */
777 				TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
778 					if (old_slp->ns_sotype == SOCK_STREAM) {
779 						time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
780 						if (time_to_wait < 1) {
781 							time_to_wait = 1;
782 						}
783 						break;
784 					}
785 				}
786 				/*
787 				 * If we have a timer scheduled, but if its going to fire too late,
788 				 * turn it off.
789 				 */
790 				if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
791 					thread_call_cancel(nfsrv_idlesock_timer_call);
792 					nfsrv_idlesock_timer_on = 0;
793 				}
794 				/* Schedule the idle thread if it isn't already */
795 				if (!nfsrv_idlesock_timer_on) {
796 					nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
797 					nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
798 				}
799 			}
800 		}
801 	}
802 
803 	sock_retain(so); /* grab a retain count on the socket */
804 	slp->ns_so = so;
805 	slp->ns_sotype = sotype;
806 	slp->ns_nam = mynam;
807 	slp->ns_sobufsize = sobufsize;
808 
809 	/* set up the socket up-call */
810 	nfsrv_uc_addsock(slp, first);
811 
812 	/* mark that the socket is not in the nfsrv_sockwg list */
813 	slp->ns_wgq.tqe_next = SLPNOLIST;
814 
815 	slp->ns_flag = SLP_VALID | SLP_NEEDQ;
816 
817 	nfsrv_wakenfsd(slp);
818 	lck_mtx_unlock(&nfsd_mutex);
819 
820 	return 0;
821 }
822 
823 /*
824  * nfssvc_nfsd()
825  *
826  * nfsd theory of operation:
827  *
828  * The first nfsd thread stays in user mode accepting new TCP connections
829  * which are then added via the "addsock" call.  The rest of the nfsd threads
830  * simply call into the kernel and remain there in a loop handling NFS
831  * requests until killed by a signal.
832  *
833  * There's a list of nfsd threads (nfsd_head).
834  * There's an nfsd queue that contains only those nfsds that are
835  *   waiting for work to do (nfsd_queue).
836  *
837  * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
838  *   managing the work on the sockets:
839  *   nfsrv_sockwait - sockets w/new data waiting to be worked on
840  *   nfsrv_sockwork - sockets being worked on which may have more work to do
841  *   nfsrv_sockwg -- sockets which have pending write gather data
842  * When a socket receives data, if it is not currently queued, it
843  *   will be placed at the end of the "wait" queue.
844  * Whenever a socket needs servicing we make sure it is queued and
845  *   wake up a waiting nfsd (if there is one).
846  *
847  * nfsds will service at most 8 requests from the same socket before
848  *   defecting to work on another socket.
849  * nfsds will defect immediately if there are any sockets in the "wait" queue
850  * nfsds looking for a socket to work on check the "wait" queue first and
851  *   then check the "work" queue.
852  * When an nfsd starts working on a socket, it removes it from the head of
853  *   the queue it's currently on and moves it to the end of the "work" queue.
854  * When nfsds are checking the queues for work, any sockets found not to
855  *   have any work are simply dropped from the queue.
856  *
857  */
858 int
nfssvc_nfsd(void)859 nfssvc_nfsd(void)
860 {
861 	mbuf_t m, mrep = NULL;
862 	struct nfsrv_sock *slp;
863 	struct nfsd *nfsd;
864 	struct nfsrv_descript *nd = NULL;
865 	int error = 0, cacherep, writes_todo;
866 	int siz, procrastinate, opcnt = 0;
867 	time_t cur_usec;
868 	struct timeval now;
869 	struct vfs_context context;
870 	struct timespec to;
871 
872 #ifndef nolint
873 	cacherep = RC_DOIT;
874 	writes_todo = 0;
875 #endif
876 
877 	nfsd = kalloc_type(struct nfsd, Z_WAITOK | Z_ZERO | Z_NOFAIL);
878 	lck_mtx_lock(&nfsd_mutex);
879 	if (nfsd_thread_count++ == 0) {
880 		nfsrv_initcache();              /* Init the server request cache */
881 	}
882 	TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
883 	lck_mtx_unlock(&nfsd_mutex);
884 
885 	context.vc_thread = current_thread();
886 
887 	/* Set time out so that nfsd threads can wake up a see if they are still needed. */
888 	to.tv_sec = 5;
889 	to.tv_nsec = 0;
890 
891 	/*
892 	 * Loop getting rpc requests until SIGKILL.
893 	 */
894 	for (;;) {
895 		if (nfsd_thread_max <= 0) {
896 			/* NFS server shutting down, get out ASAP */
897 			error = EINTR;
898 			slp = nfsd->nfsd_slp;
899 		} else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
900 			/* already have some work to do */
901 			error = 0;
902 			slp = nfsd->nfsd_slp;
903 		} else {
904 			/* need to find work to do */
905 			error = 0;
906 			lck_mtx_lock(&nfsd_mutex);
907 			while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
908 				if (nfsd_thread_count > nfsd_thread_max) {
909 					/*
910 					 * If we have no socket and there are more
911 					 * nfsd threads than configured, let's exit.
912 					 */
913 					error = 0;
914 					goto done;
915 				}
916 				nfsd->nfsd_flag |= NFSD_WAITING;
917 				TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
918 				error = msleep(nfsd, &nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
919 				if (error) {
920 					if (nfsd->nfsd_flag & NFSD_WAITING) {
921 						TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
922 						nfsd->nfsd_flag &= ~NFSD_WAITING;
923 					}
924 					if (error == EWOULDBLOCK) {
925 						continue;
926 					}
927 					goto done;
928 				}
929 			}
930 			slp = nfsd->nfsd_slp;
931 			if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
932 				/* look for a socket to work on in the wait queue */
933 				while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
934 					lck_rw_lock_exclusive(&slp->ns_rwlock);
935 					/* remove from the head of the queue */
936 					TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
937 					slp->ns_flag &= ~SLP_WAITQ;
938 					if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
939 						break;
940 					}
941 					/* nothing to do, so skip this socket */
942 					lck_rw_done(&slp->ns_rwlock);
943 				}
944 			}
945 			if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
946 				/* look for a socket to work on in the work queue */
947 				while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
948 					lck_rw_lock_exclusive(&slp->ns_rwlock);
949 					/* remove from the head of the queue */
950 					TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
951 					slp->ns_flag &= ~SLP_WORKQ;
952 					if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
953 						break;
954 					}
955 					/* nothing to do, so skip this socket */
956 					lck_rw_done(&slp->ns_rwlock);
957 				}
958 			}
959 			if (!nfsd->nfsd_slp && slp) {
960 				/* we found a socket to work on, grab a reference */
961 				slp->ns_sref++;
962 				microuptime(&now);
963 				slp->ns_timestamp = now.tv_sec;
964 				/* We keep the socket list in least recently used order for reaping idle sockets */
965 				TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
966 				TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
967 				nfsd->nfsd_slp = slp;
968 				opcnt = 0;
969 				/* and put it at the back of the work queue */
970 				TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
971 				slp->ns_flag |= SLP_WORKQ;
972 				lck_rw_done(&slp->ns_rwlock);
973 			}
974 			lck_mtx_unlock(&nfsd_mutex);
975 			if (!slp) {
976 				continue;
977 			}
978 			lck_rw_lock_exclusive(&slp->ns_rwlock);
979 			if (slp->ns_flag & SLP_VALID) {
980 				if ((slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)) == SLP_NEEDQ) {
981 					slp->ns_flag &= ~SLP_NEEDQ;
982 					nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
983 				}
984 				if (slp->ns_flag & SLP_DISCONN) {
985 					nfsrv_zapsock(slp);
986 				}
987 				error = nfsrv_dorec(slp, nfsd, &nd);
988 				if (error == EINVAL) {  // RPCSEC_GSS drop
989 					if (slp->ns_sotype == SOCK_STREAM) {
990 						nfsrv_zapsock(slp); // drop connection
991 					}
992 				}
993 				writes_todo = 0;
994 				if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
995 					microuptime(&now);
996 					cur_usec = (now.tv_sec * 1000000) + now.tv_usec;
997 					if (slp->ns_wgtime <= cur_usec) {
998 						error = 0;
999 						cacherep = RC_DOIT;
1000 						writes_todo = 1;
1001 					}
1002 					slp->ns_flag &= ~SLP_DOWRITES;
1003 				}
1004 				nfsd->nfsd_flag |= NFSD_REQINPROG;
1005 			}
1006 			lck_rw_done(&slp->ns_rwlock);
1007 		}
1008 		if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
1009 			if (nd) {
1010 				nfsm_chain_cleanup(&nd->nd_nmreq);
1011 				if (nd->nd_nam2) {
1012 					mbuf_freem(nd->nd_nam2);
1013 				}
1014 				if (IS_VALID_CRED(nd->nd_cr)) {
1015 					kauth_cred_unref(&nd->nd_cr);
1016 				}
1017 				if (nd->nd_gss_context) {
1018 					nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1019 				}
1020 				NFS_ZFREE(nfsrv_descript_zone, nd);
1021 			}
1022 			nfsd->nfsd_slp = NULL;
1023 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1024 			if (slp) {
1025 				nfsrv_slpderef(slp);
1026 			}
1027 			if (nfsd_thread_max <= 0) {
1028 				break;
1029 			}
1030 			continue;
1031 		}
1032 		if (nd) {
1033 			microuptime(&nd->nd_starttime);
1034 			if (nd->nd_nam2) {
1035 				nd->nd_nam = nd->nd_nam2;
1036 			} else {
1037 				nd->nd_nam = slp->ns_nam;
1038 			}
1039 
1040 			cacherep = nfsrv_getcache(nd, slp, &mrep);
1041 
1042 			if (nfsrv_require_resv_port) {
1043 				/* Check if source port is a reserved port */
1044 				in_port_t port = 0;
1045 				struct sockaddr *saddr = mbuf_data(nd->nd_nam);
1046 
1047 				if (saddr->sa_family == AF_INET) {
1048 					port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
1049 				} else if (saddr->sa_family == AF_INET6) {
1050 					port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
1051 				}
1052 				if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
1053 					nd->nd_procnum = NFSPROC_NOOP;
1054 					nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1055 					cacherep = RC_DOIT;
1056 				}
1057 			}
1058 		}
1059 
1060 		/*
1061 		 * Loop to get all the write RPC replies that have been
1062 		 * gathered together.
1063 		 */
1064 		do {
1065 			switch (cacherep) {
1066 			case RC_DOIT:
1067 				if (nd && (nd->nd_vers == NFS_VER3)) {
1068 					procrastinate = nfsrv_wg_delay_v3;
1069 				} else {
1070 					procrastinate = nfsrv_wg_delay;
1071 				}
1072 				lck_rw_lock_shared(&nfsrv_export_rwlock);
1073 				context.vc_ucred = NULL;
1074 				if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) {
1075 					error = nfsrv_writegather(&nd, slp, &context, &mrep);
1076 				} else {
1077 					error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
1078 				}
1079 				lck_rw_done(&nfsrv_export_rwlock);
1080 				if (mrep == NULL) {
1081 					/*
1082 					 * If this is a stream socket and we are not going
1083 					 * to send a reply we better close the connection
1084 					 * so the client doesn't hang.
1085 					 */
1086 					if (error && slp->ns_sotype == SOCK_STREAM) {
1087 						lck_rw_lock_exclusive(&slp->ns_rwlock);
1088 						nfsrv_zapsock(slp);
1089 						lck_rw_done(&slp->ns_rwlock);
1090 						printf("NFS server: NULL reply from proc = %d error = %d\n",
1091 						    nd->nd_procnum, error);
1092 					}
1093 					break;
1094 				}
1095 				if (error) {
1096 					OSAddAtomic64(1, &nfsrvstats.srv_errs);
1097 					nfsrv_updatecache(nd, FALSE, mrep);
1098 					if (nd->nd_nam2) {
1099 						mbuf_freem(nd->nd_nam2);
1100 						nd->nd_nam2 = NULL;
1101 					}
1102 					break;
1103 				}
1104 				OSAddAtomic64(1, &nfsrvstats.srvrpccntv3[nd->nd_procnum]);
1105 				nfsrv_updatecache(nd, TRUE, mrep);
1106 				OS_FALLTHROUGH;
1107 
1108 			case RC_REPLY:
1109 				if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1110 					/*
1111 					 * Need to checksum or encrypt the reply
1112 					 */
1113 					error = nfs_gss_svc_protect_reply(nd, mrep);
1114 					if (error) {
1115 						mbuf_freem(mrep);
1116 						break;
1117 					}
1118 				}
1119 
1120 				/*
1121 				 * Get the total size of the reply
1122 				 */
1123 				m = mrep;
1124 				siz = 0;
1125 				while (m) {
1126 					siz += mbuf_len(m);
1127 					m = mbuf_next(m);
1128 				}
1129 				if (siz <= 0 || siz > NFS_MAXPACKET) {
1130 					printf("mbuf siz=%d\n", siz);
1131 					panic("Bad nfs svc reply");
1132 				}
1133 				m = mrep;
1134 				mbuf_pkthdr_setlen(m, siz);
1135 				error = mbuf_pkthdr_setrcvif(m, NULL);
1136 				if (error) {
1137 					panic("nfsd setrcvif failed: %d", error);
1138 				}
1139 				/*
1140 				 * For stream protocols, prepend a Sun RPC
1141 				 * Record Mark.
1142 				 */
1143 				if (slp->ns_sotype == SOCK_STREAM) {
1144 					error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1145 					if (!error) {
1146 						*(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
1147 					}
1148 				}
1149 				if (!error) {
1150 					if (slp->ns_flag & SLP_VALID) {
1151 						error = nfsrv_send(slp, nd->nd_nam2, m);
1152 					} else {
1153 						error = EPIPE;
1154 						mbuf_freem(m);
1155 					}
1156 				} else {
1157 					mbuf_freem(m);
1158 				}
1159 				mrep = NULL;
1160 				if (nd->nd_nam2) {
1161 					mbuf_freem(nd->nd_nam2);
1162 					nd->nd_nam2 = NULL;
1163 				}
1164 				if (error == EPIPE) {
1165 					lck_rw_lock_exclusive(&slp->ns_rwlock);
1166 					nfsrv_zapsock(slp);
1167 					lck_rw_done(&slp->ns_rwlock);
1168 				}
1169 				if (error == EINTR || error == ERESTART) {
1170 					nfsm_chain_cleanup(&nd->nd_nmreq);
1171 					if (IS_VALID_CRED(nd->nd_cr)) {
1172 						kauth_cred_unref(&nd->nd_cr);
1173 					}
1174 					if (nd->nd_gss_context) {
1175 						nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1176 					}
1177 					NFS_ZFREE(nfsrv_descript_zone, nd);
1178 					nfsrv_slpderef(slp);
1179 					lck_mtx_lock(&nfsd_mutex);
1180 					goto done;
1181 				}
1182 				break;
1183 			case RC_DROPIT:
1184 				mbuf_freem(nd->nd_nam2);
1185 				nd->nd_nam2 = NULL;
1186 				break;
1187 			}
1188 			;
1189 			opcnt++;
1190 			if (nd) {
1191 				nfsm_chain_cleanup(&nd->nd_nmreq);
1192 				if (nd->nd_nam2) {
1193 					mbuf_freem(nd->nd_nam2);
1194 				}
1195 				if (IS_VALID_CRED(nd->nd_cr)) {
1196 					kauth_cred_unref(&nd->nd_cr);
1197 				}
1198 				if (nd->nd_gss_context) {
1199 					nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1200 				}
1201 				NFS_ZFREE(nfsrv_descript_zone, nd);
1202 			}
1203 
1204 			/*
1205 			 * Check to see if there are outstanding writes that
1206 			 * need to be serviced.
1207 			 */
1208 			writes_todo = 0;
1209 			if (slp->ns_wgtime) {
1210 				microuptime(&now);
1211 				cur_usec = (now.tv_sec * 1000000) + now.tv_usec;
1212 				if (slp->ns_wgtime <= cur_usec) {
1213 					cacherep = RC_DOIT;
1214 					writes_todo = 1;
1215 				}
1216 			}
1217 		} while (writes_todo);
1218 
1219 		nd = NULL;
1220 		if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1221 			lck_rw_lock_exclusive(&slp->ns_rwlock);
1222 			error = nfsrv_dorec(slp, nfsd, &nd);
1223 			if (error == EINVAL) {  // RPCSEC_GSS drop
1224 				if (slp->ns_sotype == SOCK_STREAM) {
1225 					nfsrv_zapsock(slp); // drop connection
1226 				}
1227 			}
1228 			lck_rw_done(&slp->ns_rwlock);
1229 		}
1230 		if (!nd) {
1231 			/* drop our reference on the socket */
1232 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1233 			nfsd->nfsd_slp = NULL;
1234 			nfsrv_slpderef(slp);
1235 		}
1236 	}
1237 	lck_mtx_lock(&nfsd_mutex);
1238 done:
1239 	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
1240 	kfree_type(struct nfsd, nfsd);
1241 	if (--nfsd_thread_count == 0) {
1242 		nfsrv_cleanup();
1243 	}
1244 	lck_mtx_unlock(&nfsd_mutex);
1245 	return error;
1246 }
1247 
1248 int
nfssvc_export(user_addr_t argp)1249 nfssvc_export(user_addr_t argp)
1250 {
1251 	int error = 0, is_64bit;
1252 	struct user_nfs_export_args unxa;
1253 	vfs_context_t ctx = vfs_context_current();
1254 
1255 	is_64bit = vfs_context_is64bit(ctx);
1256 
1257 	/* copy in pointers to path and export args */
1258 	if (is_64bit) {
1259 		error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1260 	} else {
1261 		struct nfs_export_args tnxa;
1262 		error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1263 		if (error == 0) {
1264 			/* munge into LP64 version of nfs_export_args structure */
1265 			unxa.nxa_fsid = tnxa.nxa_fsid;
1266 			unxa.nxa_expid = tnxa.nxa_expid;
1267 			unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1268 			unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1269 			unxa.nxa_flags = tnxa.nxa_flags;
1270 			unxa.nxa_netcount = tnxa.nxa_netcount;
1271 			unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1272 		}
1273 	}
1274 	if (error) {
1275 		return error;
1276 	}
1277 
1278 	error = nfsrv_export(&unxa, ctx);
1279 
1280 	return error;
1281 }
1282 
1283 int
nfssvc_exportstats(proc_t p,user_addr_t argp)1284 nfssvc_exportstats(proc_t p, user_addr_t argp)
1285 {
1286 	int error = 0;
1287 	uint pos;
1288 	struct nfs_exportfs *nxfs;
1289 	struct nfs_export *nx;
1290 	struct nfs_export_stat_desc stat_desc = {};
1291 	struct nfs_export_stat_rec statrec;
1292 	uint numExports, totlen, count;
1293 	size_t numRecs;
1294 	user_addr_t oldp, newlenp;
1295 	user_size_t oldlen, newlen;
1296 	struct user_iovec iov[2];
1297 
1298 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1299 	if (error) {
1300 		return error;
1301 	}
1302 
1303 	oldp = iov[0].iov_base;
1304 	oldlen = iov[0].iov_len;
1305 	newlenp = iov[1].iov_base;
1306 	newlen = iov[1].iov_len;
1307 
1308 	/* setup export stat descriptor */
1309 	stat_desc.rec_vers = NFS_EXPORT_STAT_REC_VERSION;
1310 
1311 	if (!nfsrv_is_initialized()) {
1312 		stat_desc.rec_count = 0;
1313 		if (oldp && (oldlen >= sizeof(struct nfs_export_stat_desc))) {
1314 			error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1315 		}
1316 		size_t stat_desc_size = sizeof(struct nfs_export_stat_desc);
1317 		if (!error && newlenp && newlen >= sizeof(stat_desc_size)) {
1318 			error = copyout(&stat_desc_size, newlenp, sizeof(stat_desc_size));
1319 		}
1320 		return error;
1321 	}
1322 
1323 	/* Count the number of exported directories */
1324 	lck_rw_lock_shared(&nfsrv_export_rwlock);
1325 	numExports = 0;
1326 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next)
1327 	LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next)
1328 	numExports += 1;
1329 
1330 	/* update stat descriptor's export record count */
1331 	stat_desc.rec_count = numExports;
1332 
1333 	/* calculate total size of required buffer */
1334 	totlen = sizeof(struct nfs_export_stat_desc) + (numExports * sizeof(struct nfs_export_stat_rec));
1335 
1336 	/* Check caller's buffer */
1337 	if (oldp == 0 || newlenp == 0) {
1338 		lck_rw_done(&nfsrv_export_rwlock);
1339 		/* indicate required buffer len */
1340 		if (newlenp && newlen >= sizeof(totlen)) {
1341 			error = copyout(&totlen, newlenp, sizeof(totlen));
1342 		}
1343 		return error;
1344 	}
1345 
1346 	/* We require the caller's buffer to be at least large enough to hold the descriptor */
1347 	if (oldlen < sizeof(struct nfs_export_stat_desc) || newlen < sizeof(totlen)) {
1348 		lck_rw_done(&nfsrv_export_rwlock);
1349 		/* indicate required buffer len */
1350 		if (newlenp && newlen >= sizeof(totlen)) {
1351 			(void)copyout(&totlen, newlenp, sizeof(totlen));
1352 		}
1353 		return ENOMEM;
1354 	}
1355 
1356 	/* indicate required buffer len */
1357 	error = copyout(&totlen, newlenp, sizeof(totlen));
1358 	if (error) {
1359 		lck_rw_done(&nfsrv_export_rwlock);
1360 		return error;
1361 	}
1362 
1363 	/* check if export table is empty */
1364 	if (!numExports) {
1365 		lck_rw_done(&nfsrv_export_rwlock);
1366 		error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1367 		return error;
1368 	}
1369 
1370 	/* calculate how many actual export stat records fit into caller's buffer */
1371 	numRecs = (totlen - sizeof(struct nfs_export_stat_desc)) / sizeof(struct nfs_export_stat_rec);
1372 
1373 	if (!numRecs) {
1374 		/* caller's buffer can only accomodate descriptor */
1375 		lck_rw_done(&nfsrv_export_rwlock);
1376 		stat_desc.rec_count = 0;
1377 		error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1378 		return error;
1379 	}
1380 
1381 	/* adjust to actual number of records to copyout to caller's buffer */
1382 	if (numRecs > numExports) {
1383 		numRecs = numExports;
1384 	}
1385 
1386 	/* set actual number of records we are returning */
1387 	stat_desc.rec_count = numRecs;
1388 
1389 	/* first copy out the stat descriptor */
1390 	pos = 0;
1391 	error = copyout(&stat_desc, oldp + pos, sizeof(struct nfs_export_stat_desc));
1392 	if (error) {
1393 		lck_rw_done(&nfsrv_export_rwlock);
1394 		return error;
1395 	}
1396 	pos += sizeof(struct nfs_export_stat_desc);
1397 
1398 	/* Loop through exported directories */
1399 	count = 0;
1400 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
1401 		LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
1402 			if (count >= numRecs) {
1403 				break;
1404 			}
1405 
1406 			/* build exported filesystem path */
1407 			memset(statrec.path, 0, sizeof(statrec.path));
1408 			snprintf(statrec.path, sizeof(statrec.path), "%s%s%s",
1409 			    nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
1410 			    nx->nx_path);
1411 
1412 			/* build the 64-bit export stat counters */
1413 			statrec.ops = ((uint64_t)nx->nx_stats.ops.hi << 32) |
1414 			    nx->nx_stats.ops.lo;
1415 			statrec.bytes_read = ((uint64_t)nx->nx_stats.bytes_read.hi << 32) |
1416 			    nx->nx_stats.bytes_read.lo;
1417 			statrec.bytes_written = ((uint64_t)nx->nx_stats.bytes_written.hi << 32) |
1418 			    nx->nx_stats.bytes_written.lo;
1419 			error = copyout(&statrec, oldp + pos, sizeof(statrec));
1420 			if (error) {
1421 				lck_rw_done(&nfsrv_export_rwlock);
1422 				return error;
1423 			}
1424 			/* advance buffer position */
1425 			pos += sizeof(statrec);
1426 		}
1427 	}
1428 	lck_rw_done(&nfsrv_export_rwlock);
1429 
1430 	return error;
1431 }
1432 
1433 int
nfssvc_userstats(proc_t p,user_addr_t argp)1434 nfssvc_userstats(proc_t p, user_addr_t argp)
1435 {
1436 	int error = 0;
1437 	struct nfs_exportfs *nxfs;
1438 	struct nfs_export *nx;
1439 	struct nfs_active_user_list *ulist;
1440 	struct nfs_user_stat_desc ustat_desc = {};
1441 	struct nfs_user_stat_node *unode, *unode_next;
1442 	struct nfs_user_stat_user_rec ustat_rec;
1443 	struct nfs_user_stat_path_rec upath_rec;
1444 	uint bytes_total, recs_copied, pos;
1445 	size_t bytes_avail;
1446 	user_addr_t oldp, newlenp;
1447 	user_size_t oldlen, newlen;
1448 	struct user_iovec iov[2];
1449 
1450 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1451 	if (error) {
1452 		return error;
1453 	}
1454 
1455 	oldp = iov[0].iov_base;
1456 	oldlen = iov[0].iov_len;
1457 	newlenp = iov[1].iov_base;
1458 	newlen = iov[1].iov_len;
1459 
1460 	/* init structures used for copying out of kernel */
1461 	ustat_desc.rec_vers = NFS_USER_STAT_REC_VERSION;
1462 	ustat_rec.rec_type = NFS_USER_STAT_USER_REC;
1463 	upath_rec.rec_type = NFS_USER_STAT_PATH_REC;
1464 
1465 	/* initialize counters */
1466 	bytes_total = sizeof(struct nfs_user_stat_desc);
1467 	bytes_avail  = oldlen;
1468 	recs_copied = 0;
1469 
1470 	if (!nfsrv_is_initialized()) { /* NFS server not initialized, so no stats */
1471 		goto ustat_skip;
1472 	}
1473 
1474 	/* reclaim old expired user nodes */
1475 	nfsrv_active_user_list_reclaim();
1476 
1477 	/* reserve space for the buffer descriptor */
1478 	if (bytes_avail >= sizeof(struct nfs_user_stat_desc)) {
1479 		bytes_avail -= sizeof(struct nfs_user_stat_desc);
1480 	} else {
1481 		bytes_avail = 0;
1482 	}
1483 
1484 	/* put buffer position past the buffer descriptor */
1485 	pos = sizeof(struct nfs_user_stat_desc);
1486 
1487 	/* Loop through exported directories */
1488 	lck_rw_lock_shared(&nfsrv_export_rwlock);
1489 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
1490 		LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
1491 			/* copy out path */
1492 			if (bytes_avail >= sizeof(struct nfs_user_stat_path_rec)) {
1493 				memset(upath_rec.path, 0, sizeof(upath_rec.path));
1494 				snprintf(upath_rec.path, sizeof(upath_rec.path), "%s%s%s",
1495 				    nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
1496 				    nx->nx_path);
1497 
1498 				error = copyout(&upath_rec, oldp + pos, sizeof(struct nfs_user_stat_path_rec));
1499 				if (error) {
1500 					/* punt */
1501 					goto ustat_done;
1502 				}
1503 
1504 				pos += sizeof(struct nfs_user_stat_path_rec);
1505 				bytes_avail -= sizeof(struct nfs_user_stat_path_rec);
1506 				recs_copied++;
1507 			} else {
1508 				/* Caller's buffer is exhausted */
1509 				bytes_avail = 0;
1510 			}
1511 
1512 			bytes_total += sizeof(struct nfs_user_stat_path_rec);
1513 
1514 			/* Scan through all user nodes of this export */
1515 			ulist = &nx->nx_user_list;
1516 			lck_mtx_lock(&ulist->user_mutex);
1517 			for (unode = TAILQ_FIRST(&ulist->user_lru); unode; unode = unode_next) {
1518 				unode_next = TAILQ_NEXT(unode, lru_link);
1519 
1520 				/* copy out node if there is space */
1521 				if (bytes_avail >= sizeof(struct nfs_user_stat_user_rec)) {
1522 					/* prepare a user stat rec for copying out */
1523 					ustat_rec.uid = unode->uid;
1524 					memset(&ustat_rec.sock, 0, sizeof(ustat_rec.sock));
1525 					bcopy(&unode->sock, &ustat_rec.sock, unode->sock.ss_len);
1526 					ustat_rec.ops = unode->ops;
1527 					ustat_rec.bytes_read = unode->bytes_read;
1528 					ustat_rec.bytes_written = unode->bytes_written;
1529 					ustat_rec.tm_start = unode->tm_start;
1530 					ustat_rec.tm_last = unode->tm_last;
1531 
1532 					error = copyout(&ustat_rec, oldp + pos, sizeof(struct nfs_user_stat_user_rec));
1533 
1534 					if (error) {
1535 						/* punt */
1536 						lck_mtx_unlock(&ulist->user_mutex);
1537 						goto ustat_done;
1538 					}
1539 
1540 					pos += sizeof(struct nfs_user_stat_user_rec);
1541 					bytes_avail -= sizeof(struct nfs_user_stat_user_rec);
1542 					recs_copied++;
1543 				} else {
1544 					/* Caller's buffer is exhausted */
1545 					bytes_avail = 0;
1546 				}
1547 				bytes_total += sizeof(struct nfs_user_stat_user_rec);
1548 			}
1549 			/* can unlock this export's list now */
1550 			lck_mtx_unlock(&ulist->user_mutex);
1551 		}
1552 	}
1553 
1554 ustat_done:
1555 	/* unlock the export table */
1556 	lck_rw_done(&nfsrv_export_rwlock);
1557 
1558 ustat_skip:
1559 	/* indicate number of actual records copied */
1560 	ustat_desc.rec_count = recs_copied;
1561 
1562 	if (!error) {
1563 		/* check if there was enough room for the buffer descriptor */
1564 		if (oldlen >= sizeof(struct nfs_user_stat_desc)) {
1565 			error = copyout(&ustat_desc, oldp, sizeof(struct nfs_user_stat_desc));
1566 		} else {
1567 			error = ENOMEM;
1568 		}
1569 
1570 		/* always indicate required buffer size */
1571 		if (!error && newlenp && newlen >= sizeof(bytes_total)) {
1572 			error = copyout(&bytes_total, newlenp, sizeof(bytes_total));
1573 		}
1574 	}
1575 	return error;
1576 }
1577 
1578 int
nfssvc_usercount(proc_t p,user_addr_t argp)1579 nfssvc_usercount(proc_t p, user_addr_t argp)
1580 {
1581 	int error;
1582 	user_addr_t oldp, newlenp;
1583 	user_size_t oldlen, newlen;
1584 	struct user_iovec iov[2];
1585 	size_t stat_size = sizeof(nfsrv_user_stat_node_count);
1586 
1587 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1588 	if (error) {
1589 		return error;
1590 	}
1591 
1592 	oldp = iov[0].iov_base;
1593 	oldlen = iov[0].iov_len;
1594 	newlenp = iov[1].iov_base;
1595 	newlen = iov[1].iov_len;
1596 
1597 	if (!oldp) {
1598 		if (newlenp && newlen >= sizeof(stat_size)) {
1599 			error = copyout(&stat_size, newlenp, sizeof(stat_size));
1600 		}
1601 		return error;
1602 	}
1603 
1604 	if (oldlen < stat_size) {
1605 		if (newlenp && newlen >= sizeof(stat_size)) {
1606 			(void)copyout(&stat_size, newlenp, sizeof(stat_size));
1607 		}
1608 		return ENOMEM;
1609 	}
1610 
1611 	if (nfsrv_is_initialized()) {
1612 		/* reclaim old expired user nodes */
1613 		nfsrv_active_user_list_reclaim();
1614 	}
1615 
1616 	error = copyout(&nfsrv_user_stat_node_count, oldp, sizeof(nfsrv_user_stat_node_count));
1617 
1618 	return error;
1619 }
1620 
1621 int
nfssvc_zerostats(void)1622 nfssvc_zerostats(void)
1623 {
1624 	bzero(&nfsrvstats, sizeof nfsrvstats);
1625 	return 0;
1626 }
1627 
1628 int
nfssvc_srvstats(proc_t p,user_addr_t argp)1629 nfssvc_srvstats(proc_t p, user_addr_t argp)
1630 {
1631 	int error;
1632 	user_addr_t oldp, newlenp;
1633 	user_size_t oldlen, newlen;
1634 	struct user_iovec iov[2];
1635 	size_t stat_size = sizeof(nfsrvstats);
1636 
1637 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1638 	if (error) {
1639 		return error;
1640 	}
1641 
1642 	oldp = iov[0].iov_base;
1643 	oldlen = iov[0].iov_len;
1644 	newlenp = iov[1].iov_base;
1645 	newlen = iov[1].iov_len;
1646 
1647 	if (!oldp) {
1648 		if (newlenp && newlen >= sizeof(stat_size)) {
1649 			error = copyout(&stat_size, newlenp, sizeof(stat_size));
1650 		}
1651 		return error;
1652 	}
1653 
1654 	if (oldlen < stat_size) {
1655 		if (newlenp && newlen >= sizeof(stat_size)) {
1656 			(void)copyout(&stat_size, newlenp, sizeof(stat_size));
1657 		}
1658 		return ENOMEM;
1659 	}
1660 
1661 	error = copyout(&nfsrvstats, oldp, stat_size);
1662 	if (error) {
1663 		return error;
1664 	}
1665 
1666 	return 0;
1667 }
1668 
1669 /*
1670  * Shut down a socket associated with an nfsrv_sock structure.
1671  * Should be called with the send lock set, if required.
1672  * The trick here is to increment the sref at the start, so that the nfsds
1673  * will stop using it and clear ns_flag at the end so that it will not be
1674  * reassigned during cleanup.
1675  */
1676 void
nfsrv_zapsock(struct nfsrv_sock * slp)1677 nfsrv_zapsock(struct nfsrv_sock *slp)
1678 {
1679 	socket_t so;
1680 
1681 	if ((slp->ns_flag & SLP_VALID) == 0) {
1682 		return;
1683 	}
1684 	slp->ns_flag &= ~SLP_ALLFLAGS;
1685 
1686 	so = slp->ns_so;
1687 	if (so == NULL) {
1688 		return;
1689 	}
1690 
1691 	sock_setupcall(so, NULL, NULL);
1692 	sock_shutdown(so, SHUT_RDWR);
1693 
1694 	/*
1695 	 * Remove from the up-call queue
1696 	 */
1697 	nfsrv_uc_dequeue(slp);
1698 }
1699 
1700 /*
1701  * cleanup and release a server socket structure.
1702  */
1703 void
nfsrv_slpfree(struct nfsrv_sock * slp)1704 nfsrv_slpfree(struct nfsrv_sock *slp)
1705 {
1706 	struct nfsrv_descript *nwp, *nnwp;
1707 
1708 	if (slp->ns_so) {
1709 		sock_release(slp->ns_so);
1710 		slp->ns_so = NULL;
1711 	}
1712 	if (slp->ns_nam) {
1713 		mbuf_free(slp->ns_nam);
1714 	}
1715 	if (slp->ns_raw) {
1716 		mbuf_freem(slp->ns_raw);
1717 	}
1718 	if (slp->ns_rec) {
1719 		mbuf_freem(slp->ns_rec);
1720 	}
1721 	if (slp->ns_frag) {
1722 		mbuf_freem(slp->ns_frag);
1723 	}
1724 	slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
1725 	slp->ns_reccnt = 0;
1726 
1727 	for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
1728 		nnwp = nwp->nd_tq.le_next;
1729 		LIST_REMOVE(nwp, nd_tq);
1730 		nfsm_chain_cleanup(&nwp->nd_nmreq);
1731 		if (nwp->nd_mrep) {
1732 			mbuf_freem(nwp->nd_mrep);
1733 		}
1734 		if (nwp->nd_nam2) {
1735 			mbuf_freem(nwp->nd_nam2);
1736 		}
1737 		if (IS_VALID_CRED(nwp->nd_cr)) {
1738 			kauth_cred_unref(&nwp->nd_cr);
1739 		}
1740 		if (nwp->nd_gss_context) {
1741 			nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
1742 		}
1743 		NFS_ZFREE(nfsrv_descript_zone, nwp);
1744 	}
1745 	LIST_INIT(&slp->ns_tq);
1746 
1747 	lck_rw_destroy(&slp->ns_rwlock, &nfsrv_slp_rwlock_group);
1748 	lck_mtx_destroy(&slp->ns_wgmutex, &nfsrv_slp_mutex_group);
1749 	kfree_type(struct nfsrv_sock, slp);
1750 }
1751 
1752 /*
1753  * Derefence a server socket structure. If it has no more references and
1754  * is no longer valid, you can throw it away.
1755  */
1756 static void
nfsrv_slpderef_locked(struct nfsrv_sock * slp)1757 nfsrv_slpderef_locked(struct nfsrv_sock *slp)
1758 {
1759 	lck_rw_lock_exclusive(&slp->ns_rwlock);
1760 	slp->ns_sref--;
1761 
1762 	if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
1763 		if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
1764 			/* remove socket from queue since there's no work */
1765 			if (slp->ns_flag & SLP_WAITQ) {
1766 				TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1767 			} else {
1768 				TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1769 			}
1770 			slp->ns_flag &= ~SLP_QUEUED;
1771 		}
1772 		lck_rw_done(&slp->ns_rwlock);
1773 		return;
1774 	}
1775 
1776 	/* This socket is no longer valid, so we'll get rid of it */
1777 
1778 	if (slp->ns_flag & SLP_QUEUED) {
1779 		if (slp->ns_flag & SLP_WAITQ) {
1780 			TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1781 		} else {
1782 			TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1783 		}
1784 		slp->ns_flag &= ~SLP_QUEUED;
1785 	}
1786 	lck_rw_done(&slp->ns_rwlock);
1787 
1788 	TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1789 	if (slp->ns_sotype == SOCK_STREAM) {
1790 		nfsrv_sock_tcp_cnt--;
1791 	}
1792 
1793 	/* now remove from the write gather socket list */
1794 	if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1795 		TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1796 		slp->ns_wgq.tqe_next = SLPNOLIST;
1797 	}
1798 	nfsrv_slpfree(slp);
1799 }
1800 
1801 void
nfsrv_slpderef(struct nfsrv_sock * slp)1802 nfsrv_slpderef(struct nfsrv_sock *slp)
1803 {
1804 	lck_mtx_lock(&nfsd_mutex);
1805 	nfsrv_slpderef_locked(slp);
1806 	lck_mtx_unlock(&nfsd_mutex);
1807 }
1808 
1809 /*
1810  * Check periodically for idle sockest if needed and
1811  * zap them.
1812  */
1813 void
nfsrv_idlesock_timer(__unused void * param0,__unused void * param1)1814 nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
1815 {
1816 	struct nfsrv_sock *slp, *tslp;
1817 	struct timeval now;
1818 	time_t time_to_wait = nfsrv_sock_idle_timeout;
1819 
1820 	microuptime(&now);
1821 	lck_mtx_lock(&nfsd_mutex);
1822 
1823 	/* Turn off the timer if we're suppose to and get out */
1824 	if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) {
1825 		nfsrv_sock_idle_timeout = 0;
1826 	}
1827 	if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
1828 		nfsrv_idlesock_timer_on = 0;
1829 		lck_mtx_unlock(&nfsd_mutex);
1830 		return;
1831 	}
1832 
1833 	TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
1834 		lck_rw_lock_exclusive(&slp->ns_rwlock);
1835 		/* Skip udp and referenced sockets */
1836 		if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
1837 			lck_rw_done(&slp->ns_rwlock);
1838 			continue;
1839 		}
1840 		/*
1841 		 * If this is the first non-referenced socket that hasn't idle out,
1842 		 * use its time stamp to calculate the earlist time in the future
1843 		 * to start the next invocation of the timer. Since the nfsrv_socklist
1844 		 * is sorted oldest access to newest. Once we find the first one,
1845 		 * we're done and break out of the loop.
1846 		 */
1847 		if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) ||
1848 		    nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
1849 			time_to_wait -= now.tv_sec - slp->ns_timestamp;
1850 			if (time_to_wait < 1) {
1851 				time_to_wait = 1;
1852 			}
1853 			lck_rw_done(&slp->ns_rwlock);
1854 			break;
1855 		}
1856 		/*
1857 		 * Bump the ref count. nfsrv_slpderef below will destroy
1858 		 * the socket, since nfsrv_zapsock has closed it.
1859 		 */
1860 		slp->ns_sref++;
1861 		nfsrv_zapsock(slp);
1862 		lck_rw_done(&slp->ns_rwlock);
1863 		nfsrv_slpderef_locked(slp);
1864 	}
1865 
1866 	/* Start ourself back up */
1867 	nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1868 	/* Remember when the next timer will fire for nfssvc_addsock. */
1869 	nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
1870 	lck_mtx_unlock(&nfsd_mutex);
1871 }
1872 
1873 /*
1874  * Clean up the data structures for the server.
1875  */
1876 void
nfsrv_cleanup(void)1877 nfsrv_cleanup(void)
1878 {
1879 	struct nfsrv_sock *slp, *nslp;
1880 	struct timeval now;
1881 #if CONFIG_FSE
1882 	struct nfsrv_fmod *fp, *nfp;
1883 	int i;
1884 #endif
1885 
1886 	microuptime(&now);
1887 	for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
1888 		nslp = TAILQ_NEXT(slp, ns_chain);
1889 		lck_rw_lock_exclusive(&slp->ns_rwlock);
1890 		slp->ns_sref++;
1891 		if (slp->ns_flag & SLP_VALID) {
1892 			nfsrv_zapsock(slp);
1893 		}
1894 		lck_rw_done(&slp->ns_rwlock);
1895 		nfsrv_slpderef_locked(slp);
1896 	}
1897 #
1898 #if CONFIG_FSE
1899 	/*
1900 	 * Flush pending file write fsevents
1901 	 */
1902 	lck_mtx_lock(&nfsrv_fmod_mutex);
1903 	for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1904 		for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
1905 			/*
1906 			 * Fire off the content modified fsevent for each
1907 			 * entry, remove it from the list, and free it.
1908 			 */
1909 			if (nfsrv_fsevents_enabled) {
1910 				fp->fm_context.vc_thread = current_thread();
1911 				add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
1912 				    FSE_ARG_VNODE, fp->fm_vp,
1913 				    FSE_ARG_DONE);
1914 			}
1915 			vnode_put(fp->fm_vp);
1916 			kauth_cred_unref(&fp->fm_context.vc_ucred);
1917 			nfp = LIST_NEXT(fp, fm_link);
1918 			LIST_REMOVE(fp, fm_link);
1919 			kfree_type(struct nfsrv_fmod, fp);
1920 		}
1921 	}
1922 	nfsrv_fmod_pending = 0;
1923 	lck_mtx_unlock(&nfsrv_fmod_mutex);
1924 #endif
1925 
1926 	nfsrv_uc_cleanup();     /* Stop nfs socket up-call threads */
1927 
1928 	nfs_gss_svc_cleanup();  /* Remove any RPCSEC_GSS contexts */
1929 
1930 	nfsrv_cleancache();     /* And clear out server cache */
1931 
1932 	nfsrv_udpsock = NULL;
1933 	nfsrv_udp6sock = NULL;
1934 }
1935 
1936 #endif /* CONFIG_NFS_SERVER */
1937