xref: /xnu-8796.141.3/bsd/nfs/nfs_syscalls.c (revision 1b191cb58250d0705d8a51287127505aa4bc0789)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1989, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * Rick Macklem at The University of Guelph.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
65  * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66  */
67 
68 #include <nfs/nfs_conf.h>
69 
70 /*
71  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
72  * support for mandatory and extensible security protections.  This notice
73  * is included in support of clause 2.2 (b) of the Apple Public License,
74  * Version 2.0.
75  */
76 
77 #include <sys/file_internal.h>
78 #include <sys/vnode_internal.h>
79 #include <sys/uio_internal.h>
80 #include <sys/sysctl.h>
81 #include <sys/socketvar.h>
82 #include <sys/sysproto.h>
83 #include <sys/fsevents.h>
84 #include <kern/task.h>
85 
86 #include <security/audit/audit.h>
87 
88 #include <netinet/in.h>
89 #include <netinet/tcp.h>
90 #include <nfs/xdr_subs.h>
91 #include <nfs/rpcv2.h>
92 #include <nfs/nfsproto.h>
93 #include <nfs/nfs.h>
94 #include <nfs/nfsm_subs.h>
95 #include <nfs/nfsrvcache.h>
96 #include <nfs/nfs_gss.h>
97 #if CONFIG_MACF
98 #include <security/mac_framework.h>
99 #endif
100 
101 #if CONFIG_NFS_SERVER
102 
103 extern const nfsrv_proc_t nfsrv_procs[NFS_NPROCS];
104 
105 extern int nfsrv_wg_delay;
106 extern int nfsrv_wg_delay_v3;
107 
108 static int nfsrv_require_resv_port = 0;
109 static time_t  nfsrv_idlesock_timer_on = 0;
110 static int nfsrv_sock_tcp_cnt = 0;
111 #define NFSD_MIN_IDLE_TIMEOUT 30
112 static int nfsrv_sock_idle_timeout = 3600; /* One hour */
113 
114 int     nfssvc_export(user_addr_t argp);
115 int     nfssvc_exportstats(proc_t p, user_addr_t argp);
116 int     nfssvc_userstats(proc_t p, user_addr_t argp);
117 int     nfssvc_usercount(proc_t p, user_addr_t argp);
118 int     nfssvc_zerostats(void);
119 int     nfssvc_srvstats(proc_t p, user_addr_t argp);
120 int     nfssvc_nfsd(void);
121 int     nfssvc_addsock(socket_t, mbuf_t);
122 void    nfsrv_zapsock(struct nfsrv_sock *);
123 void    nfsrv_slpderef(struct nfsrv_sock *);
124 void    nfsrv_slpfree(struct nfsrv_sock *);
125 
126 #endif /* CONFIG_NFS_SERVER */
127 
128 /*
129  * sysctl stuff
130  */
131 SYSCTL_DECL(_vfs_generic);
132 SYSCTL_EXTENSIBLE_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs hinge");
133 
134 #if CONFIG_NFS_SERVER
135 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs server hinge");
136 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
137 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
138 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
139 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
140 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
141 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
142 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
143 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
144 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
145 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_debug_ctl, 0, "");
146 #if CONFIG_FSE
147 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
148 #endif
149 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
150 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
151 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
152 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
153 #ifdef NFS_UC_Q_DEBUG
154 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
155 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
156 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
157 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
158 #endif
159 #endif /* CONFIG_NFS_SERVER */
160 
161 /* NFS hooks */
162 
163 /* NFS hooks variables */
164 struct nfs_hooks_in nfsh = {
165 	.f_vinvalbuf      = NULL,
166 	.f_buf_page_inval = NULL
167 };
168 
169 /* NFS hooks registration functions */
170 void
nfs_register_hooks(struct nfs_hooks_in * inh,struct nfs_hooks_out * outh)171 nfs_register_hooks(struct nfs_hooks_in *inh, struct nfs_hooks_out *outh)
172 {
173 	if (inh) {
174 		nfsh.f_vinvalbuf = inh->f_vinvalbuf;
175 		nfsh.f_buf_page_inval = inh->f_buf_page_inval;
176 	}
177 
178 	if (outh) {
179 		outh->f_get_bsdthreadtask_info = get_bsdthreadtask_info;
180 	}
181 }
182 
183 void
nfs_unregister_hooks(void)184 nfs_unregister_hooks(void)
185 {
186 	memset(&nfsh, 0, sizeof(nfsh));
187 }
188 
189 /* NFS hooks wrappers */
190 int
nfs_vinvalbuf(vnode_t vp,int flags,vfs_context_t ctx,int intrflg)191 nfs_vinvalbuf(vnode_t vp, int flags, vfs_context_t ctx, int intrflg)
192 {
193 	if (nfsh.f_vinvalbuf == NULL) {
194 		return 0;
195 	}
196 
197 	return nfsh.f_vinvalbuf(vp, flags, ctx, intrflg);
198 }
199 
200 int
nfs_buf_page_inval(vnode_t vp,off_t offset)201 nfs_buf_page_inval(vnode_t vp, off_t offset)
202 {
203 	if (nfsh.f_buf_page_inval == NULL) {
204 		return 0;
205 	}
206 
207 	return nfsh.f_buf_page_inval(vp, offset);
208 }
209 
210 #if !CONFIG_NFS_SERVER
211 #define __no_nfs_server_unused      __unused
212 #else
213 #define __no_nfs_server_unused      /* nothing */
214 #endif
215 
216 /*
217  * NFS server system calls
218  * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
219  */
220 
221 #if CONFIG_NFS_SERVER
222 static struct nfs_exportfs *
nfsrv_find_exportfs(const char * ptr)223 nfsrv_find_exportfs(const char *ptr)
224 {
225 	struct nfs_exportfs *nxfs;
226 
227 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
228 		if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) {
229 			break;
230 		}
231 	}
232 	if (nxfs && strncmp(nxfs->nxfs_path, ptr, strlen(nxfs->nxfs_path))) {
233 		nxfs = NULL;
234 	}
235 
236 	return nxfs;
237 }
238 
239 static char *
nfsrv_export_remainder(char * path,char * nxfs_path)240 nfsrv_export_remainder(char *path, char *nxfs_path)
241 {
242 	int error;
243 	vnode_t vp, rvp;
244 	struct nameidata nd;
245 	size_t pathbuflen = MAXPATHLEN;
246 	char real_mntonname[MAXPATHLEN];
247 
248 	if (!strncmp(path, nxfs_path, strlen(nxfs_path))) {
249 		return path + strlen(nxfs_path);
250 	}
251 
252 	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
253 	    UIO_SYSSPACE, CAST_USER_ADDR_T(nxfs_path), vfs_context_current());
254 	error = namei(&nd);
255 	if (error) {
256 		return NULL;
257 	}
258 
259 	nameidone(&nd);
260 	vp = nd.ni_vp;
261 
262 	error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
263 	vnode_put(vp);
264 	if (error) {
265 		return NULL;
266 	}
267 
268 	error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen, VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
269 	vnode_put(rvp);
270 
271 	if (error || strncmp(path, real_mntonname, strlen(real_mntonname))) {
272 		return NULL;
273 	}
274 
275 	return path + strlen(real_mntonname);
276 }
277 /*
278  * Get file handle system call
279  */
280 int
getfh(proc_t p __no_nfs_server_unused,struct getfh_args * uap __no_nfs_server_unused,__unused int * retval)281 getfh(
282 	proc_t p __no_nfs_server_unused,
283 	struct getfh_args *uap __no_nfs_server_unused,
284 	__unused int *retval)
285 {
286 	vnode_t vp;
287 	struct nfs_filehandle nfh;
288 	int error, fhlen = 0, fidlen;
289 	struct nameidata nd;
290 	char path[MAXPATHLEN], real_mntonname[MAXPATHLEN], *ptr;
291 	size_t pathlen;
292 	struct nfs_exportfs *nxfs;
293 	struct nfs_export *nx;
294 
295 	/*
296 	 * Must be super user
297 	 */
298 	error = proc_suser(p);
299 	if (error) {
300 		return error;
301 	}
302 
303 	error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
304 	if (!error) {
305 		error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
306 	}
307 	if (error) {
308 		return error;
309 	}
310 	/* limit fh size to length specified (or v3 size by default) */
311 	if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) {
312 		fhlen = NFSV3_MAX_FH_SIZE;
313 	}
314 	fidlen = fhlen - sizeof(struct nfs_exphandle);
315 
316 	if (!nfsrv_is_initialized()) {
317 		return EINVAL;
318 	}
319 
320 	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
321 	    UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
322 	error = namei(&nd);
323 	if (error) {
324 		return error;
325 	}
326 	nameidone(&nd);
327 
328 	vp = nd.ni_vp;
329 
330 	// find exportfs that matches f_mntonname
331 	lck_rw_lock_shared(&nfsrv_export_rwlock);
332 	ptr = vfs_statfs(vnode_mount(vp))->f_mntonname;
333 	if ((nxfs = nfsrv_find_exportfs(ptr)) == NULL) {
334 		/*
335 		 * The f_mntonname might be a firmlink path.  Resolve
336 		 * it into a physical path and try again.
337 		 */
338 		size_t pathbuflen = MAXPATHLEN;
339 		vnode_t rvp;
340 
341 		error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
342 		if (error) {
343 			goto out;
344 		}
345 		error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen,
346 		    VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
347 		vnode_put(rvp);
348 		if (error) {
349 			goto out;
350 		}
351 		ptr = real_mntonname;
352 		nxfs = nfsrv_find_exportfs(ptr);
353 	}
354 	if (nxfs == NULL) {
355 		error = EINVAL;
356 		goto out;
357 	}
358 	// find export that best matches remainder of path
359 	if ((ptr = nfsrv_export_remainder(path, nxfs->nxfs_path)) == NULL) {
360 		error = EINVAL;
361 		goto out;
362 	}
363 
364 	while (*ptr && (*ptr == '/')) {
365 		ptr++;
366 	}
367 	LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
368 		size_t len = strlen(nx->nx_path);
369 		if (len == 0) { // we've hit the export entry for the root directory
370 			break;
371 		}
372 		if (!strncmp(nx->nx_path, ptr, len)) {
373 			break;
374 		}
375 	}
376 	if (!nx) {
377 		error = EINVAL;
378 		goto out;
379 	}
380 
381 	bzero(&nfh, sizeof(nfh));
382 	nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
383 	nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
384 	nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
385 	nfh.nfh_xh.nxh_flags = 0;
386 	nfh.nfh_xh.nxh_reserved = 0;
387 	nfh.nfh_len = fidlen;
388 	error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
389 	if (nfh.nfh_len > (uint32_t)fidlen) {
390 		error = EOVERFLOW;
391 	}
392 	nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
393 	nfh.nfh_len += sizeof(nfh.nfh_xh);
394 	nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
395 
396 out:
397 	lck_rw_done(&nfsrv_export_rwlock);
398 	vnode_put(vp);
399 	if (error) {
400 		return error;
401 	}
402 	/*
403 	 * At first blush, this may appear to leak a kernel stack
404 	 * address, but the copyout() never reaches &nfh.nfh_fhp
405 	 * (sizeof(fhandle_t) < sizeof(nfh)).
406 	 */
407 	error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
408 	return error;
409 }
410 
411 extern const struct fileops vnops;
412 
413 /*
414  * syscall for the rpc.lockd to use to translate a NFS file handle into
415  * an open descriptor.
416  *
417  * warning: do not remove the suser() call or this becomes one giant
418  * security hole.
419  */
420 int
fhopen(proc_t p __no_nfs_server_unused,struct fhopen_args * uap __no_nfs_server_unused,int32_t * retval __no_nfs_server_unused)421 fhopen(proc_t p __no_nfs_server_unused,
422     struct fhopen_args *uap __no_nfs_server_unused,
423     int32_t *retval __no_nfs_server_unused)
424 {
425 	vnode_t vp;
426 	struct nfs_filehandle nfh;
427 	struct nfs_export *nx;
428 	struct nfs_export_options *nxo;
429 	struct flock lf;
430 	struct fileproc *fp, *nfp;
431 	int fmode, error, type;
432 	int indx;
433 	vfs_context_t ctx = vfs_context_current();
434 	kauth_action_t action;
435 
436 	/*
437 	 * Must be super user
438 	 */
439 	error = suser(vfs_context_ucred(ctx), 0);
440 	if (error) {
441 		return error;
442 	}
443 
444 	if (!nfsrv_is_initialized()) {
445 		return EINVAL;
446 	}
447 
448 	fmode = FFLAGS(uap->flags);
449 	/* why not allow a non-read/write open for our lockd? */
450 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) {
451 		return EINVAL;
452 	}
453 
454 	error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
455 	if (error) {
456 		return error;
457 	}
458 	if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
459 	    (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) {
460 		return EINVAL;
461 	}
462 	error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
463 	if (error) {
464 		return error;
465 	}
466 	nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
467 
468 	lck_rw_lock_shared(&nfsrv_export_rwlock);
469 	/* now give me my vnode, it gets returned to me with a reference */
470 	error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
471 	lck_rw_done(&nfsrv_export_rwlock);
472 	if (error) {
473 		if (error == NFSERR_TRYLATER) {
474 			error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
475 		}
476 		return error;
477 	}
478 
479 	/*
480 	 * From now on we have to make sure not
481 	 * to forget about the vnode.
482 	 * Any error that causes an abort must vnode_put(vp).
483 	 * Just set error = err and 'goto bad;'.
484 	 */
485 
486 	/*
487 	 * from vn_open
488 	 */
489 	if (vnode_vtype(vp) == VSOCK) {
490 		error = EOPNOTSUPP;
491 		goto bad;
492 	}
493 
494 	/* disallow write operations on directories */
495 	if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
496 		error = EISDIR;
497 		goto bad;
498 	}
499 
500 #if CONFIG_MACF
501 	if ((error = mac_vnode_check_open(ctx, vp, fmode))) {
502 		goto bad;
503 	}
504 #endif
505 
506 	/* compute action to be authorized */
507 	action = 0;
508 	if (fmode & FREAD) {
509 		action |= KAUTH_VNODE_READ_DATA;
510 	}
511 	if (fmode & (FWRITE | O_TRUNC)) {
512 		action |= KAUTH_VNODE_WRITE_DATA;
513 	}
514 	if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) {
515 		goto bad;
516 	}
517 
518 	if ((error = VNOP_OPEN(vp, fmode, ctx))) {
519 		goto bad;
520 	}
521 	if ((error = vnode_ref_ext(vp, fmode, 0))) {
522 		goto bad;
523 	}
524 
525 	/*
526 	 * end of vn_open code
527 	 */
528 
529 	// starting here... error paths should call vn_close/vnode_put
530 	if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
531 		vn_close(vp, fmode & FMASK, ctx);
532 		goto bad;
533 	}
534 	fp = nfp;
535 
536 	fp->fp_glob->fg_flag = fmode & FMASK;
537 	fp->fp_glob->fg_ops = &vnops;
538 	fp_set_data(fp, vp);
539 
540 	// XXX do we really need to support this with fhopen()?
541 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
542 		lf.l_whence = SEEK_SET;
543 		lf.l_start = 0;
544 		lf.l_len = 0;
545 		if (fmode & O_EXLOCK) {
546 			lf.l_type = F_WRLCK;
547 		} else {
548 			lf.l_type = F_RDLCK;
549 		}
550 		type = F_FLOCK;
551 		if ((fmode & FNONBLOCK) == 0) {
552 			type |= F_WAIT;
553 		}
554 		if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf, type, ctx, NULL))) {
555 			struct vfs_context context = *vfs_context_current();
556 			/* Modify local copy (to not damage thread copy) */
557 			context.vc_ucred = fp->fp_glob->fg_cred;
558 
559 			vn_close(vp, fp->fp_glob->fg_flag, &context);
560 			fp_free(p, indx, fp);
561 			goto bad;
562 		}
563 		fp->fp_glob->fg_flag |= FWASLOCKED;
564 	}
565 
566 	vnode_put(vp);
567 
568 	proc_fdlock(p);
569 	procfdtbl_releasefd(p, indx, NULL);
570 	fp_drop(p, indx, fp, 1);
571 	proc_fdunlock(p);
572 
573 	*retval = indx;
574 	return 0;
575 
576 bad:
577 	vnode_put(vp);
578 	return error;
579 }
580 
581 /*
582  * NFS server pseudo system call
583  */
584 int
nfssvc(proc_t p __no_nfs_server_unused,struct nfssvc_args * uap __no_nfs_server_unused,__unused int * retval)585 nfssvc(proc_t p __no_nfs_server_unused,
586     struct nfssvc_args *uap __no_nfs_server_unused,
587     __unused int *retval)
588 {
589 	mbuf_t nam;
590 	struct user_nfsd_args user_nfsdarg;
591 	socket_t so;
592 	int error;
593 
594 	AUDIT_ARG(cmd, uap->flag);
595 
596 	/*
597 	 * Must be super user for NFSSVC_NFSD and NFSSVC_ADDSOCK operations.
598 	 */
599 	if ((uap->flag & (NFSSVC_NFSD | NFSSVC_ADDSOCK)) && ((error = proc_suser(p)))) {
600 		return error;
601 	}
602 #if CONFIG_MACF
603 	error = mac_system_check_nfsd(kauth_cred_get());
604 	if (error) {
605 		return error;
606 	}
607 #endif
608 
609 	/* make sure NFS server data structures have been initialized */
610 	nfsrv_init();
611 
612 	if (uap->flag & NFSSVC_ADDSOCK) {
613 		if (IS_64BIT_PROCESS(p)) {
614 			error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
615 		} else {
616 			struct nfsd_args    tmp_args;
617 			error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
618 			if (error == 0) {
619 				user_nfsdarg.sock = tmp_args.sock;
620 				user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
621 				user_nfsdarg.namelen = tmp_args.namelen;
622 			}
623 		}
624 		if (error) {
625 			return error;
626 		}
627 		/* get the socket */
628 		error = file_socket(user_nfsdarg.sock, &so);
629 		if (error) {
630 			return error;
631 		}
632 		/* Get the client address for connected sockets. */
633 		if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
634 			nam = NULL;
635 		} else {
636 			error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
637 			if (error) {
638 				/* drop the iocount file_socket() grabbed on the file descriptor */
639 				file_drop(user_nfsdarg.sock);
640 				return error;
641 			}
642 		}
643 		/*
644 		 * nfssvc_addsock() will grab a retain count on the socket
645 		 * to keep the socket from being closed when nfsd closes its
646 		 * file descriptor for it.
647 		 */
648 		error = nfssvc_addsock(so, nam);
649 		/* drop the iocount file_socket() grabbed on the file descriptor */
650 		file_drop(user_nfsdarg.sock);
651 	} else if (uap->flag & NFSSVC_NFSD) {
652 		error = nfssvc_nfsd();
653 	} else if (uap->flag & NFSSVC_EXPORT) {
654 		error = nfssvc_export(uap->argp);
655 	} else if (uap->flag & NFSSVC_EXPORTSTATS) {
656 		error = nfssvc_exportstats(p, uap->argp);
657 	} else if (uap->flag & NFSSVC_USERSTATS) {
658 		error = nfssvc_userstats(p, uap->argp);
659 	} else if (uap->flag & NFSSVC_USERCOUNT) {
660 		error = nfssvc_usercount(p, uap->argp);
661 	} else if (uap->flag & NFSSVC_ZEROSTATS) {
662 		error = nfssvc_zerostats();
663 	} else if (uap->flag & NFSSVC_SRVSTATS) {
664 		error = nfssvc_srvstats(p, uap->argp);
665 	} else {
666 		error = EINVAL;
667 	}
668 	if (error == EINTR || error == ERESTART) {
669 		error = 0;
670 	}
671 	return error;
672 }
673 
674 /*
675  * Adds a socket to the list for servicing by nfsds.
676  */
677 int
nfssvc_addsock(socket_t so,mbuf_t mynam)678 nfssvc_addsock(socket_t so, mbuf_t mynam)
679 {
680 	struct nfsrv_sock *slp;
681 	int error = 0, sodomain, sotype, soprotocol, on = 1;
682 	int first;
683 	struct timeval timeo;
684 	uint64_t sbmaxsize, sobufsize;
685 
686 	/* make sure mbuf constants are set up */
687 	if (!nfs_mbuf_mhlen) {
688 		nfs_mbuf_init();
689 	}
690 
691 	sock_gettype(so, &sodomain, &sotype, &soprotocol);
692 
693 	/* There should be only one UDP socket for each of IPv4 and IPv6 */
694 	if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
695 		mbuf_freem(mynam);
696 		return EEXIST;
697 	}
698 	if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
699 		mbuf_freem(mynam);
700 		return EEXIST;
701 	}
702 
703 	/* Set protocol options and reserve some space (for UDP). */
704 	if (sotype == SOCK_STREAM) {
705 		error = nfsrv_check_exports_allow_address(mynam);
706 		if (error) {
707 			log(LOG_INFO, "nfsvc_addsock:: nfsrv_check_exports_allow_address(myname) returned %d\n", error);
708 			mbuf_freem(mynam);
709 			return error;
710 		}
711 		sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
712 	}
713 	if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) {
714 		sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
715 	}
716 
717 	/* Calculate maximum supported socket buffers sizes */
718 	sbmaxsize = (uint64_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
719 
720 	/* Set socket buffer sizes for UDP/TCP */
721 	sobufsize = MIN(sbmaxsize, (sotype == SOCK_DGRAM) ? NFS_UDPSOCKBUF : NFSRV_TCPSOCKBUF);
722 	error = sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &sobufsize, sizeof(sobufsize));
723 	if (error) {
724 		log(LOG_INFO, "nfssvc_addsock: socket buffer setting SO_SNDBUF to %llu error(s) %d\n", sobufsize, error);
725 	}
726 
727 	error = sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &sobufsize, sizeof(sobufsize));
728 	if (error) {
729 		log(LOG_INFO, "nfssvc_addsock: socket buffer setting SO_RCVBUF to %llu error(s) %d\n", sobufsize, error);
730 	}
731 	sock_nointerrupt(so, 0);
732 
733 	/*
734 	 * Set socket send/receive timeouts.
735 	 * Receive timeout shouldn't matter, but setting the send timeout
736 	 * will make sure that an unresponsive client can't hang the server.
737 	 */
738 	timeo.tv_usec = 0;
739 	timeo.tv_sec = 1;
740 	error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
741 	if (error) {
742 		log(LOG_INFO, "nfssvc_addsock: socket timeout setting SO_RCVTIMEO error(s) %d\n", error);
743 	}
744 
745 	timeo.tv_sec = 30;
746 	error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
747 	if (error) {
748 		log(LOG_INFO, "nfssvc_addsock: socket timeout setting SO_SNDTIMEO error(s) %d\n", error);
749 	}
750 
751 	slp = kalloc_type(struct nfsrv_sock, Z_WAITOK | Z_ZERO | Z_NOFAIL);
752 	lck_rw_init(&slp->ns_rwlock, &nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
753 	lck_mtx_init(&slp->ns_wgmutex, &nfsrv_slp_mutex_group, LCK_ATTR_NULL);
754 
755 	lck_mtx_lock(&nfsd_mutex);
756 
757 	if (soprotocol == IPPROTO_UDP) {
758 		if (sodomain == AF_INET) {
759 			/* There should be only one UDP/IPv4 socket */
760 			if (nfsrv_udpsock) {
761 				lck_mtx_unlock(&nfsd_mutex);
762 				nfsrv_slpfree(slp);
763 				mbuf_freem(mynam);
764 				return EEXIST;
765 			}
766 			nfsrv_udpsock = slp;
767 		}
768 		if (sodomain == AF_INET6) {
769 			/* There should be only one UDP/IPv6 socket */
770 			if (nfsrv_udp6sock) {
771 				lck_mtx_unlock(&nfsd_mutex);
772 				nfsrv_slpfree(slp);
773 				mbuf_freem(mynam);
774 				return EEXIST;
775 			}
776 			nfsrv_udp6sock = slp;
777 		}
778 	}
779 
780 	/* add the socket to the list */
781 	first = TAILQ_EMPTY(&nfsrv_socklist);
782 	TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
783 	if (sotype == SOCK_STREAM) {
784 		nfsrv_sock_tcp_cnt++;
785 		if (nfsrv_sock_idle_timeout < 0) {
786 			nfsrv_sock_idle_timeout = 0;
787 		}
788 		if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) {
789 			nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
790 		}
791 		/*
792 		 * Possibly start or stop the idle timer. We only start the idle timer when
793 		 * we have more than 2 * nfsd_thread_max connections. If the idle timer is
794 		 * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
795 		 * the number of connections.
796 		 */
797 		if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
798 			if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
799 				if (nfsrv_idlesock_timer_on) {
800 					thread_call_cancel(nfsrv_idlesock_timer_call);
801 					nfsrv_idlesock_timer_on = 0;
802 				}
803 			} else {
804 				struct nfsrv_sock *old_slp;
805 				struct timeval now;
806 				microuptime(&now);
807 				time_t time_to_wait = nfsrv_sock_idle_timeout;
808 				/*
809 				 * Get the oldest tcp socket and calculate the
810 				 * earliest time for the next idle timer to fire
811 				 * based on the possibly updated nfsrv_sock_idle_timeout
812 				 */
813 				TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
814 					if (old_slp->ns_sotype == SOCK_STREAM) {
815 						time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
816 						if (time_to_wait < 1) {
817 							time_to_wait = 1;
818 						}
819 						break;
820 					}
821 				}
822 				/*
823 				 * If we have a timer scheduled, but if its going to fire too late,
824 				 * turn it off.
825 				 */
826 				if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
827 					thread_call_cancel(nfsrv_idlesock_timer_call);
828 					nfsrv_idlesock_timer_on = 0;
829 				}
830 				/* Schedule the idle thread if it isn't already */
831 				if (!nfsrv_idlesock_timer_on) {
832 					nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
833 					nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
834 				}
835 			}
836 		}
837 	}
838 
839 	sock_retain(so); /* grab a retain count on the socket */
840 	slp->ns_so = so;
841 	slp->ns_sotype = sotype;
842 	slp->ns_nam = mynam;
843 
844 	/* set up the socket up-call */
845 	nfsrv_uc_addsock(slp, first);
846 
847 	/* mark that the socket is not in the nfsrv_sockwg list */
848 	slp->ns_wgq.tqe_next = SLPNOLIST;
849 
850 	slp->ns_flag = SLP_VALID | SLP_NEEDQ;
851 
852 	nfsrv_wakenfsd(slp);
853 	lck_mtx_unlock(&nfsd_mutex);
854 
855 	return 0;
856 }
857 
858 /*
859  * nfssvc_nfsd()
860  *
861  * nfsd theory of operation:
862  *
863  * The first nfsd thread stays in user mode accepting new TCP connections
864  * which are then added via the "addsock" call.  The rest of the nfsd threads
865  * simply call into the kernel and remain there in a loop handling NFS
866  * requests until killed by a signal.
867  *
868  * There's a list of nfsd threads (nfsd_head).
869  * There's an nfsd queue that contains only those nfsds that are
870  *   waiting for work to do (nfsd_queue).
871  *
872  * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
873  *   managing the work on the sockets:
874  *   nfsrv_sockwait - sockets w/new data waiting to be worked on
875  *   nfsrv_sockwork - sockets being worked on which may have more work to do
876  *   nfsrv_sockwg -- sockets which have pending write gather data
877  * When a socket receives data, if it is not currently queued, it
878  *   will be placed at the end of the "wait" queue.
879  * Whenever a socket needs servicing we make sure it is queued and
880  *   wake up a waiting nfsd (if there is one).
881  *
882  * nfsds will service at most 8 requests from the same socket before
883  *   defecting to work on another socket.
884  * nfsds will defect immediately if there are any sockets in the "wait" queue
885  * nfsds looking for a socket to work on check the "wait" queue first and
886  *   then check the "work" queue.
887  * When an nfsd starts working on a socket, it removes it from the head of
888  *   the queue it's currently on and moves it to the end of the "work" queue.
889  * When nfsds are checking the queues for work, any sockets found not to
890  *   have any work are simply dropped from the queue.
891  *
892  */
893 int
nfssvc_nfsd(void)894 nfssvc_nfsd(void)
895 {
896 	mbuf_t m, mrep = NULL;
897 	struct nfsrv_sock *slp;
898 	struct nfsd *nfsd;
899 	struct nfsrv_descript *nd = NULL;
900 	int error = 0, cacherep, writes_todo;
901 	int siz, procrastinate, opcnt = 0;
902 	time_t cur_usec;
903 	struct timeval now;
904 	struct vfs_context context;
905 	struct timespec to;
906 
907 #ifndef nolint
908 	cacherep = RC_DOIT;
909 	writes_todo = 0;
910 #endif
911 
912 	nfsd = kalloc_type(struct nfsd, Z_WAITOK | Z_ZERO | Z_NOFAIL);
913 	lck_mtx_lock(&nfsd_mutex);
914 	if (nfsd_thread_count++ == 0) {
915 		nfsrv_initcache();              /* Init the server request cache */
916 	}
917 	TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
918 	lck_mtx_unlock(&nfsd_mutex);
919 
920 	context.vc_thread = current_thread();
921 
922 	/* Set time out so that nfsd threads can wake up a see if they are still needed. */
923 	to.tv_sec = 5;
924 	to.tv_nsec = 0;
925 
926 	/*
927 	 * Loop getting rpc requests until SIGKILL.
928 	 */
929 	for (;;) {
930 		if (nfsd_thread_max <= 0) {
931 			/* NFS server shutting down, get out ASAP */
932 			error = EINTR;
933 			slp = nfsd->nfsd_slp;
934 		} else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
935 			/* already have some work to do */
936 			error = 0;
937 			slp = nfsd->nfsd_slp;
938 		} else {
939 			/* need to find work to do */
940 			error = 0;
941 			lck_mtx_lock(&nfsd_mutex);
942 			while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
943 				if (nfsd_thread_count > nfsd_thread_max) {
944 					/*
945 					 * If we have no socket and there are more
946 					 * nfsd threads than configured, let's exit.
947 					 */
948 					error = 0;
949 					goto done;
950 				}
951 				nfsd->nfsd_flag |= NFSD_WAITING;
952 				TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
953 				error = msleep(nfsd, &nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
954 				if (error) {
955 					if (nfsd->nfsd_flag & NFSD_WAITING) {
956 						TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
957 						nfsd->nfsd_flag &= ~NFSD_WAITING;
958 					}
959 					if (error == EWOULDBLOCK) {
960 						continue;
961 					}
962 					goto done;
963 				}
964 			}
965 			slp = nfsd->nfsd_slp;
966 			if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
967 				/* look for a socket to work on in the wait queue */
968 				while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
969 					lck_rw_lock_exclusive(&slp->ns_rwlock);
970 					/* remove from the head of the queue */
971 					TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
972 					slp->ns_flag &= ~SLP_WAITQ;
973 					if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
974 						break;
975 					}
976 					/* nothing to do, so skip this socket */
977 					lck_rw_done(&slp->ns_rwlock);
978 				}
979 			}
980 			if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
981 				/* look for a socket to work on in the work queue */
982 				while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
983 					lck_rw_lock_exclusive(&slp->ns_rwlock);
984 					/* remove from the head of the queue */
985 					TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
986 					slp->ns_flag &= ~SLP_WORKQ;
987 					if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
988 						break;
989 					}
990 					/* nothing to do, so skip this socket */
991 					lck_rw_done(&slp->ns_rwlock);
992 				}
993 			}
994 			if (!nfsd->nfsd_slp && slp) {
995 				/* we found a socket to work on, grab a reference */
996 				slp->ns_sref++;
997 				microuptime(&now);
998 				slp->ns_timestamp = now.tv_sec;
999 				/* We keep the socket list in least recently used order for reaping idle sockets */
1000 				TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1001 				TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
1002 				nfsd->nfsd_slp = slp;
1003 				opcnt = 0;
1004 				/* and put it at the back of the work queue */
1005 				TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
1006 				slp->ns_flag |= SLP_WORKQ;
1007 				lck_rw_done(&slp->ns_rwlock);
1008 			}
1009 			lck_mtx_unlock(&nfsd_mutex);
1010 			if (!slp) {
1011 				continue;
1012 			}
1013 			lck_rw_lock_exclusive(&slp->ns_rwlock);
1014 			if (slp->ns_flag & SLP_VALID) {
1015 				if ((slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)) == SLP_NEEDQ) {
1016 					slp->ns_flag &= ~SLP_NEEDQ;
1017 					nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
1018 				}
1019 				if (slp->ns_flag & SLP_DISCONN) {
1020 					nfsrv_zapsock(slp);
1021 				}
1022 				error = nfsrv_dorec(slp, nfsd, &nd);
1023 				if (error == EINVAL) {  // RPCSEC_GSS drop
1024 					if (slp->ns_sotype == SOCK_STREAM) {
1025 						nfsrv_zapsock(slp); // drop connection
1026 					}
1027 				}
1028 				writes_todo = 0;
1029 				if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
1030 					microuptime(&now);
1031 					cur_usec = (now.tv_sec * 1000000) + now.tv_usec;
1032 					if (slp->ns_wgtime <= cur_usec) {
1033 						error = 0;
1034 						cacherep = RC_DOIT;
1035 						writes_todo = 1;
1036 					}
1037 					slp->ns_flag &= ~SLP_DOWRITES;
1038 				}
1039 				nfsd->nfsd_flag |= NFSD_REQINPROG;
1040 			}
1041 			lck_rw_done(&slp->ns_rwlock);
1042 		}
1043 		if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
1044 			if (nd) {
1045 				nfsm_chain_cleanup(&nd->nd_nmreq);
1046 				if (nd->nd_nam2) {
1047 					mbuf_freem(nd->nd_nam2);
1048 				}
1049 				if (IS_VALID_CRED(nd->nd_cr)) {
1050 					kauth_cred_unref(&nd->nd_cr);
1051 				}
1052 				if (nd->nd_gss_context) {
1053 					nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1054 				}
1055 				NFS_ZFREE(nfsrv_descript_zone, nd);
1056 			}
1057 			nfsd->nfsd_slp = NULL;
1058 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1059 			if (slp) {
1060 				nfsrv_slpderef(slp);
1061 			}
1062 			if (nfsd_thread_max <= 0) {
1063 				break;
1064 			}
1065 			continue;
1066 		}
1067 		if (nd) {
1068 			microuptime(&nd->nd_starttime);
1069 			if (nd->nd_nam2) {
1070 				nd->nd_nam = nd->nd_nam2;
1071 			} else {
1072 				nd->nd_nam = slp->ns_nam;
1073 			}
1074 
1075 			cacherep = nfsrv_getcache(nd, slp, &mrep);
1076 
1077 			if (nfsrv_require_resv_port) {
1078 				/* Check if source port is a reserved port */
1079 				in_port_t port = 0;
1080 				struct sockaddr *saddr = mbuf_data(nd->nd_nam);
1081 
1082 				if (saddr->sa_family == AF_INET) {
1083 					port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
1084 				} else if (saddr->sa_family == AF_INET6) {
1085 					port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
1086 				}
1087 				if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
1088 					nd->nd_procnum = NFSPROC_NOOP;
1089 					nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1090 					cacherep = RC_DOIT;
1091 				}
1092 			}
1093 		}
1094 
1095 		/*
1096 		 * Loop to get all the write RPC replies that have been
1097 		 * gathered together.
1098 		 */
1099 		do {
1100 			switch (cacherep) {
1101 			case RC_DOIT:
1102 				if (nd && (nd->nd_vers == NFS_VER3)) {
1103 					procrastinate = nfsrv_wg_delay_v3;
1104 				} else {
1105 					procrastinate = nfsrv_wg_delay;
1106 				}
1107 				lck_rw_lock_shared(&nfsrv_export_rwlock);
1108 				context.vc_ucred = NULL;
1109 				if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) {
1110 					error = nfsrv_writegather(&nd, slp, &context, &mrep);
1111 				} else {
1112 					error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
1113 				}
1114 				lck_rw_done(&nfsrv_export_rwlock);
1115 				if (mrep == NULL) {
1116 					/*
1117 					 * If this is a stream socket and we are not going
1118 					 * to send a reply we better close the connection
1119 					 * so the client doesn't hang.
1120 					 */
1121 					if (error && slp->ns_sotype == SOCK_STREAM) {
1122 						lck_rw_lock_exclusive(&slp->ns_rwlock);
1123 						nfsrv_zapsock(slp);
1124 						lck_rw_done(&slp->ns_rwlock);
1125 						printf("NFS server: NULL reply from proc = %d error = %d\n",
1126 						    nd->nd_procnum, error);
1127 					}
1128 					break;
1129 				}
1130 				if (error) {
1131 					OSAddAtomic64(1, &nfsrvstats.srv_errs);
1132 					nfsrv_updatecache(nd, FALSE, mrep);
1133 					if (nd->nd_nam2) {
1134 						mbuf_freem(nd->nd_nam2);
1135 						nd->nd_nam2 = NULL;
1136 					}
1137 					break;
1138 				}
1139 				OSAddAtomic64(1, &nfsrvstats.srvrpccntv3[nd->nd_procnum]);
1140 				nfsrv_updatecache(nd, TRUE, mrep);
1141 				OS_FALLTHROUGH;
1142 
1143 			case RC_REPLY:
1144 				if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1145 					/*
1146 					 * Need to checksum or encrypt the reply
1147 					 */
1148 					error = nfs_gss_svc_protect_reply(nd, mrep);
1149 					if (error) {
1150 						mbuf_freem(mrep);
1151 						break;
1152 					}
1153 				}
1154 
1155 				/*
1156 				 * Get the total size of the reply
1157 				 */
1158 				m = mrep;
1159 				siz = 0;
1160 				while (m) {
1161 					siz += mbuf_len(m);
1162 					m = mbuf_next(m);
1163 				}
1164 				if (siz <= 0 || siz > NFS_MAXPACKET) {
1165 					printf("mbuf siz=%d\n", siz);
1166 					panic("Bad nfs svc reply");
1167 				}
1168 				m = mrep;
1169 				mbuf_pkthdr_setlen(m, siz);
1170 				error = mbuf_pkthdr_setrcvif(m, NULL);
1171 				if (error) {
1172 					panic("nfsd setrcvif failed: %d", error);
1173 				}
1174 				/*
1175 				 * For stream protocols, prepend a Sun RPC
1176 				 * Record Mark.
1177 				 */
1178 				if (slp->ns_sotype == SOCK_STREAM) {
1179 					error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1180 					if (!error) {
1181 						*(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
1182 					}
1183 				}
1184 				if (!error) {
1185 					if (slp->ns_flag & SLP_VALID) {
1186 						error = nfsrv_send(slp, nd->nd_nam2, m);
1187 					} else {
1188 						error = EPIPE;
1189 						mbuf_freem(m);
1190 					}
1191 				} else {
1192 					mbuf_freem(m);
1193 				}
1194 				mrep = NULL;
1195 				if (nd->nd_nam2) {
1196 					mbuf_freem(nd->nd_nam2);
1197 					nd->nd_nam2 = NULL;
1198 				}
1199 				if (error == EPIPE) {
1200 					lck_rw_lock_exclusive(&slp->ns_rwlock);
1201 					nfsrv_zapsock(slp);
1202 					lck_rw_done(&slp->ns_rwlock);
1203 				}
1204 				if (error == EINTR || error == ERESTART) {
1205 					nfsm_chain_cleanup(&nd->nd_nmreq);
1206 					if (IS_VALID_CRED(nd->nd_cr)) {
1207 						kauth_cred_unref(&nd->nd_cr);
1208 					}
1209 					if (nd->nd_gss_context) {
1210 						nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1211 					}
1212 					NFS_ZFREE(nfsrv_descript_zone, nd);
1213 					nfsrv_slpderef(slp);
1214 					lck_mtx_lock(&nfsd_mutex);
1215 					goto done;
1216 				}
1217 				break;
1218 			case RC_DROPIT:
1219 				mbuf_freem(nd->nd_nam2);
1220 				nd->nd_nam2 = NULL;
1221 				break;
1222 			}
1223 			;
1224 			opcnt++;
1225 			if (nd) {
1226 				nfsm_chain_cleanup(&nd->nd_nmreq);
1227 				if (nd->nd_nam2) {
1228 					mbuf_freem(nd->nd_nam2);
1229 				}
1230 				if (IS_VALID_CRED(nd->nd_cr)) {
1231 					kauth_cred_unref(&nd->nd_cr);
1232 				}
1233 				if (nd->nd_gss_context) {
1234 					nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1235 				}
1236 				NFS_ZFREE(nfsrv_descript_zone, nd);
1237 			}
1238 
1239 			/*
1240 			 * Check to see if there are outstanding writes that
1241 			 * need to be serviced.
1242 			 */
1243 			writes_todo = 0;
1244 			if (slp->ns_wgtime) {
1245 				microuptime(&now);
1246 				cur_usec = (now.tv_sec * 1000000) + now.tv_usec;
1247 				if (slp->ns_wgtime <= cur_usec) {
1248 					cacherep = RC_DOIT;
1249 					writes_todo = 1;
1250 				}
1251 			}
1252 		} while (writes_todo);
1253 
1254 		nd = NULL;
1255 		if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1256 			lck_rw_lock_exclusive(&slp->ns_rwlock);
1257 			error = nfsrv_dorec(slp, nfsd, &nd);
1258 			if (error == EINVAL) {  // RPCSEC_GSS drop
1259 				if (slp->ns_sotype == SOCK_STREAM) {
1260 					nfsrv_zapsock(slp); // drop connection
1261 				}
1262 			}
1263 			lck_rw_done(&slp->ns_rwlock);
1264 		}
1265 		if (!nd) {
1266 			/* drop our reference on the socket */
1267 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1268 			nfsd->nfsd_slp = NULL;
1269 			nfsrv_slpderef(slp);
1270 		}
1271 	}
1272 	lck_mtx_lock(&nfsd_mutex);
1273 done:
1274 	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
1275 	kfree_type(struct nfsd, nfsd);
1276 	if (--nfsd_thread_count == 0) {
1277 		nfsrv_cleanup();
1278 	}
1279 	lck_mtx_unlock(&nfsd_mutex);
1280 	return error;
1281 }
1282 
1283 int
nfssvc_export(user_addr_t argp)1284 nfssvc_export(user_addr_t argp)
1285 {
1286 	int error = 0, is_64bit;
1287 	struct user_nfs_export_args unxa;
1288 	vfs_context_t ctx = vfs_context_current();
1289 
1290 	is_64bit = vfs_context_is64bit(ctx);
1291 
1292 	/* copy in pointers to path and export args */
1293 	if (is_64bit) {
1294 		error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1295 	} else {
1296 		struct nfs_export_args tnxa;
1297 		error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1298 		if (error == 0) {
1299 			/* munge into LP64 version of nfs_export_args structure */
1300 			unxa.nxa_fsid = tnxa.nxa_fsid;
1301 			unxa.nxa_expid = tnxa.nxa_expid;
1302 			unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1303 			unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1304 			unxa.nxa_flags = tnxa.nxa_flags;
1305 			unxa.nxa_netcount = tnxa.nxa_netcount;
1306 			unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1307 		}
1308 	}
1309 	if (error) {
1310 		return error;
1311 	}
1312 
1313 	error = nfsrv_export(&unxa, ctx);
1314 
1315 	return error;
1316 }
1317 
1318 int
nfssvc_exportstats(proc_t p,user_addr_t argp)1319 nfssvc_exportstats(proc_t p, user_addr_t argp)
1320 {
1321 	int error = 0;
1322 	uint pos;
1323 	struct nfs_exportfs *nxfs;
1324 	struct nfs_export *nx;
1325 	struct nfs_export_stat_desc stat_desc = {};
1326 	struct nfs_export_stat_rec statrec;
1327 	uint numExports, totlen, count;
1328 	size_t numRecs;
1329 	user_addr_t oldp, newlenp;
1330 	user_size_t oldlen, newlen;
1331 	struct user_iovec iov[2];
1332 
1333 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1334 	if (error) {
1335 		return error;
1336 	}
1337 
1338 	oldp = iov[0].iov_base;
1339 	oldlen = iov[0].iov_len;
1340 	newlenp = iov[1].iov_base;
1341 	newlen = iov[1].iov_len;
1342 
1343 	/* setup export stat descriptor */
1344 	stat_desc.rec_vers = NFS_EXPORT_STAT_REC_VERSION;
1345 
1346 	if (!nfsrv_is_initialized()) {
1347 		stat_desc.rec_count = 0;
1348 		if (oldp && (oldlen >= sizeof(struct nfs_export_stat_desc))) {
1349 			error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1350 		}
1351 		size_t stat_desc_size = sizeof(struct nfs_export_stat_desc);
1352 		if (!error && newlenp && newlen >= sizeof(stat_desc_size)) {
1353 			error = copyout(&stat_desc_size, newlenp, sizeof(stat_desc_size));
1354 		}
1355 		return error;
1356 	}
1357 
1358 	/* Count the number of exported directories */
1359 	lck_rw_lock_shared(&nfsrv_export_rwlock);
1360 	numExports = 0;
1361 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next)
1362 	LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next)
1363 	numExports += 1;
1364 
1365 	/* update stat descriptor's export record count */
1366 	stat_desc.rec_count = numExports;
1367 
1368 	/* calculate total size of required buffer */
1369 	totlen = sizeof(struct nfs_export_stat_desc) + (numExports * sizeof(struct nfs_export_stat_rec));
1370 
1371 	/* Check caller's buffer */
1372 	if (oldp == 0 || newlenp == 0) {
1373 		lck_rw_done(&nfsrv_export_rwlock);
1374 		/* indicate required buffer len */
1375 		if (newlenp && newlen >= sizeof(totlen)) {
1376 			error = copyout(&totlen, newlenp, sizeof(totlen));
1377 		}
1378 		return error;
1379 	}
1380 
1381 	/* We require the caller's buffer to be at least large enough to hold the descriptor */
1382 	if (oldlen < sizeof(struct nfs_export_stat_desc) || newlen < sizeof(totlen)) {
1383 		lck_rw_done(&nfsrv_export_rwlock);
1384 		/* indicate required buffer len */
1385 		if (newlenp && newlen >= sizeof(totlen)) {
1386 			(void)copyout(&totlen, newlenp, sizeof(totlen));
1387 		}
1388 		return ENOMEM;
1389 	}
1390 
1391 	/* indicate required buffer len */
1392 	error = copyout(&totlen, newlenp, sizeof(totlen));
1393 	if (error) {
1394 		lck_rw_done(&nfsrv_export_rwlock);
1395 		return error;
1396 	}
1397 
1398 	/* check if export table is empty */
1399 	if (!numExports) {
1400 		lck_rw_done(&nfsrv_export_rwlock);
1401 		error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1402 		return error;
1403 	}
1404 
1405 	/* calculate how many actual export stat records fit into caller's buffer */
1406 	numRecs = (totlen - sizeof(struct nfs_export_stat_desc)) / sizeof(struct nfs_export_stat_rec);
1407 
1408 	if (!numRecs) {
1409 		/* caller's buffer can only accomodate descriptor */
1410 		lck_rw_done(&nfsrv_export_rwlock);
1411 		stat_desc.rec_count = 0;
1412 		error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1413 		return error;
1414 	}
1415 
1416 	/* adjust to actual number of records to copyout to caller's buffer */
1417 	if (numRecs > numExports) {
1418 		numRecs = numExports;
1419 	}
1420 
1421 	/* set actual number of records we are returning */
1422 	stat_desc.rec_count = numRecs;
1423 
1424 	/* first copy out the stat descriptor */
1425 	pos = 0;
1426 	error = copyout(&stat_desc, oldp + pos, sizeof(struct nfs_export_stat_desc));
1427 	if (error) {
1428 		lck_rw_done(&nfsrv_export_rwlock);
1429 		return error;
1430 	}
1431 	pos += sizeof(struct nfs_export_stat_desc);
1432 
1433 	/* Loop through exported directories */
1434 	count = 0;
1435 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
1436 		LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
1437 			if (count >= numRecs) {
1438 				break;
1439 			}
1440 
1441 			/* build exported filesystem path */
1442 			memset(statrec.path, 0, sizeof(statrec.path));
1443 			snprintf(statrec.path, sizeof(statrec.path), "%s%s%s",
1444 			    nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
1445 			    nx->nx_path);
1446 
1447 			/* build the 64-bit export stat counters */
1448 			statrec.ops = ((uint64_t)nx->nx_stats.ops.hi << 32) |
1449 			    nx->nx_stats.ops.lo;
1450 			statrec.bytes_read = ((uint64_t)nx->nx_stats.bytes_read.hi << 32) |
1451 			    nx->nx_stats.bytes_read.lo;
1452 			statrec.bytes_written = ((uint64_t)nx->nx_stats.bytes_written.hi << 32) |
1453 			    nx->nx_stats.bytes_written.lo;
1454 			error = copyout(&statrec, oldp + pos, sizeof(statrec));
1455 			if (error) {
1456 				lck_rw_done(&nfsrv_export_rwlock);
1457 				return error;
1458 			}
1459 			/* advance buffer position */
1460 			pos += sizeof(statrec);
1461 		}
1462 	}
1463 	lck_rw_done(&nfsrv_export_rwlock);
1464 
1465 	return error;
1466 }
1467 
1468 int
nfssvc_userstats(proc_t p,user_addr_t argp)1469 nfssvc_userstats(proc_t p, user_addr_t argp)
1470 {
1471 	int error = 0;
1472 	struct nfs_exportfs *nxfs;
1473 	struct nfs_export *nx;
1474 	struct nfs_active_user_list *ulist;
1475 	struct nfs_user_stat_desc ustat_desc = {};
1476 	struct nfs_user_stat_node *unode, *unode_next;
1477 	struct nfs_user_stat_user_rec ustat_rec;
1478 	struct nfs_user_stat_path_rec upath_rec;
1479 	uint bytes_total, recs_copied, pos;
1480 	size_t bytes_avail;
1481 	user_addr_t oldp, newlenp;
1482 	user_size_t oldlen, newlen;
1483 	struct user_iovec iov[2];
1484 
1485 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1486 	if (error) {
1487 		return error;
1488 	}
1489 
1490 	oldp = iov[0].iov_base;
1491 	oldlen = iov[0].iov_len;
1492 	newlenp = iov[1].iov_base;
1493 	newlen = iov[1].iov_len;
1494 
1495 	/* init structures used for copying out of kernel */
1496 	ustat_desc.rec_vers = NFS_USER_STAT_REC_VERSION;
1497 	ustat_rec.rec_type = NFS_USER_STAT_USER_REC;
1498 	upath_rec.rec_type = NFS_USER_STAT_PATH_REC;
1499 
1500 	/* initialize counters */
1501 	bytes_total = sizeof(struct nfs_user_stat_desc);
1502 	bytes_avail  = oldlen;
1503 	recs_copied = 0;
1504 
1505 	if (!nfsrv_is_initialized()) { /* NFS server not initialized, so no stats */
1506 		goto ustat_skip;
1507 	}
1508 
1509 	/* reclaim old expired user nodes */
1510 	nfsrv_active_user_list_reclaim();
1511 
1512 	/* reserve space for the buffer descriptor */
1513 	if (bytes_avail >= sizeof(struct nfs_user_stat_desc)) {
1514 		bytes_avail -= sizeof(struct nfs_user_stat_desc);
1515 	} else {
1516 		bytes_avail = 0;
1517 	}
1518 
1519 	/* put buffer position past the buffer descriptor */
1520 	pos = sizeof(struct nfs_user_stat_desc);
1521 
1522 	/* Loop through exported directories */
1523 	lck_rw_lock_shared(&nfsrv_export_rwlock);
1524 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
1525 		LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
1526 			/* copy out path */
1527 			if (bytes_avail >= sizeof(struct nfs_user_stat_path_rec)) {
1528 				memset(upath_rec.path, 0, sizeof(upath_rec.path));
1529 				snprintf(upath_rec.path, sizeof(upath_rec.path), "%s%s%s",
1530 				    nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
1531 				    nx->nx_path);
1532 
1533 				error = copyout(&upath_rec, oldp + pos, sizeof(struct nfs_user_stat_path_rec));
1534 				if (error) {
1535 					/* punt */
1536 					goto ustat_done;
1537 				}
1538 
1539 				pos += sizeof(struct nfs_user_stat_path_rec);
1540 				bytes_avail -= sizeof(struct nfs_user_stat_path_rec);
1541 				recs_copied++;
1542 			} else {
1543 				/* Caller's buffer is exhausted */
1544 				bytes_avail = 0;
1545 			}
1546 
1547 			bytes_total += sizeof(struct nfs_user_stat_path_rec);
1548 
1549 			/* Scan through all user nodes of this export */
1550 			ulist = &nx->nx_user_list;
1551 			lck_mtx_lock(&ulist->user_mutex);
1552 			for (unode = TAILQ_FIRST(&ulist->user_lru); unode; unode = unode_next) {
1553 				unode_next = TAILQ_NEXT(unode, lru_link);
1554 
1555 				/* copy out node if there is space */
1556 				if (bytes_avail >= sizeof(struct nfs_user_stat_user_rec)) {
1557 					/* prepare a user stat rec for copying out */
1558 					ustat_rec.uid = unode->uid;
1559 					memset(&ustat_rec.sock, 0, sizeof(ustat_rec.sock));
1560 					bcopy(&unode->sock, &ustat_rec.sock, unode->sock.ss_len);
1561 					ustat_rec.ops = unode->ops;
1562 					ustat_rec.bytes_read = unode->bytes_read;
1563 					ustat_rec.bytes_written = unode->bytes_written;
1564 					ustat_rec.tm_start = unode->tm_start;
1565 					ustat_rec.tm_last = unode->tm_last;
1566 
1567 					error = copyout(&ustat_rec, oldp + pos, sizeof(struct nfs_user_stat_user_rec));
1568 
1569 					if (error) {
1570 						/* punt */
1571 						lck_mtx_unlock(&ulist->user_mutex);
1572 						goto ustat_done;
1573 					}
1574 
1575 					pos += sizeof(struct nfs_user_stat_user_rec);
1576 					bytes_avail -= sizeof(struct nfs_user_stat_user_rec);
1577 					recs_copied++;
1578 				} else {
1579 					/* Caller's buffer is exhausted */
1580 					bytes_avail = 0;
1581 				}
1582 				bytes_total += sizeof(struct nfs_user_stat_user_rec);
1583 			}
1584 			/* can unlock this export's list now */
1585 			lck_mtx_unlock(&ulist->user_mutex);
1586 		}
1587 	}
1588 
1589 ustat_done:
1590 	/* unlock the export table */
1591 	lck_rw_done(&nfsrv_export_rwlock);
1592 
1593 ustat_skip:
1594 	/* indicate number of actual records copied */
1595 	ustat_desc.rec_count = recs_copied;
1596 
1597 	if (!error) {
1598 		/* check if there was enough room for the buffer descriptor */
1599 		if (oldlen >= sizeof(struct nfs_user_stat_desc)) {
1600 			error = copyout(&ustat_desc, oldp, sizeof(struct nfs_user_stat_desc));
1601 		} else {
1602 			error = ENOMEM;
1603 		}
1604 
1605 		/* always indicate required buffer size */
1606 		if (!error && newlenp && newlen >= sizeof(bytes_total)) {
1607 			error = copyout(&bytes_total, newlenp, sizeof(bytes_total));
1608 		}
1609 	}
1610 	return error;
1611 }
1612 
1613 int
nfssvc_usercount(proc_t p,user_addr_t argp)1614 nfssvc_usercount(proc_t p, user_addr_t argp)
1615 {
1616 	int error;
1617 	user_addr_t oldp, newlenp;
1618 	user_size_t oldlen, newlen;
1619 	struct user_iovec iov[2];
1620 	size_t stat_size = sizeof(nfsrv_user_stat_node_count);
1621 
1622 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1623 	if (error) {
1624 		return error;
1625 	}
1626 
1627 	oldp = iov[0].iov_base;
1628 	oldlen = iov[0].iov_len;
1629 	newlenp = iov[1].iov_base;
1630 	newlen = iov[1].iov_len;
1631 
1632 	if (!oldp) {
1633 		if (newlenp && newlen >= sizeof(stat_size)) {
1634 			error = copyout(&stat_size, newlenp, sizeof(stat_size));
1635 		}
1636 		return error;
1637 	}
1638 
1639 	if (oldlen < stat_size) {
1640 		if (newlenp && newlen >= sizeof(stat_size)) {
1641 			(void)copyout(&stat_size, newlenp, sizeof(stat_size));
1642 		}
1643 		return ENOMEM;
1644 	}
1645 
1646 	if (nfsrv_is_initialized()) {
1647 		/* reclaim old expired user nodes */
1648 		nfsrv_active_user_list_reclaim();
1649 	}
1650 
1651 	error = copyout(&nfsrv_user_stat_node_count, oldp, sizeof(nfsrv_user_stat_node_count));
1652 
1653 	return error;
1654 }
1655 
1656 int
nfssvc_zerostats(void)1657 nfssvc_zerostats(void)
1658 {
1659 	bzero(&nfsrvstats, sizeof nfsrvstats);
1660 	return 0;
1661 }
1662 
1663 int
nfssvc_srvstats(proc_t p,user_addr_t argp)1664 nfssvc_srvstats(proc_t p, user_addr_t argp)
1665 {
1666 	int error;
1667 	user_addr_t oldp, newlenp;
1668 	user_size_t oldlen, newlen;
1669 	struct user_iovec iov[2];
1670 	size_t stat_size = sizeof(nfsrvstats);
1671 
1672 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1673 	if (error) {
1674 		return error;
1675 	}
1676 
1677 	oldp = iov[0].iov_base;
1678 	oldlen = iov[0].iov_len;
1679 	newlenp = iov[1].iov_base;
1680 	newlen = iov[1].iov_len;
1681 
1682 	if (!oldp) {
1683 		if (newlenp && newlen >= sizeof(stat_size)) {
1684 			error = copyout(&stat_size, newlenp, sizeof(stat_size));
1685 		}
1686 		return error;
1687 	}
1688 
1689 	if (oldlen < stat_size) {
1690 		if (newlenp && newlen >= sizeof(stat_size)) {
1691 			(void)copyout(&stat_size, newlenp, sizeof(stat_size));
1692 		}
1693 		return ENOMEM;
1694 	}
1695 
1696 	error = copyout(&nfsrvstats, oldp, stat_size);
1697 	if (error) {
1698 		return error;
1699 	}
1700 
1701 	return 0;
1702 }
1703 
1704 /*
1705  * Shut down a socket associated with an nfsrv_sock structure.
1706  * Should be called with the send lock set, if required.
1707  * The trick here is to increment the sref at the start, so that the nfsds
1708  * will stop using it and clear ns_flag at the end so that it will not be
1709  * reassigned during cleanup.
1710  */
1711 void
nfsrv_zapsock(struct nfsrv_sock * slp)1712 nfsrv_zapsock(struct nfsrv_sock *slp)
1713 {
1714 	socket_t so;
1715 
1716 	if ((slp->ns_flag & SLP_VALID) == 0) {
1717 		return;
1718 	}
1719 	slp->ns_flag &= ~SLP_ALLFLAGS;
1720 
1721 	so = slp->ns_so;
1722 	if (so == NULL) {
1723 		return;
1724 	}
1725 
1726 	sock_setupcall(so, NULL, NULL);
1727 	sock_shutdown(so, SHUT_RDWR);
1728 
1729 	/*
1730 	 * Remove from the up-call queue
1731 	 */
1732 	nfsrv_uc_dequeue(slp);
1733 }
1734 
1735 /*
1736  * cleanup and release a server socket structure.
1737  */
1738 void
nfsrv_slpfree(struct nfsrv_sock * slp)1739 nfsrv_slpfree(struct nfsrv_sock *slp)
1740 {
1741 	struct nfsrv_descript *nwp, *nnwp;
1742 
1743 	if (slp->ns_so) {
1744 		sock_release(slp->ns_so);
1745 		slp->ns_so = NULL;
1746 	}
1747 	if (slp->ns_nam) {
1748 		mbuf_free(slp->ns_nam);
1749 	}
1750 	if (slp->ns_raw) {
1751 		mbuf_freem(slp->ns_raw);
1752 	}
1753 	if (slp->ns_rec) {
1754 		mbuf_freem(slp->ns_rec);
1755 	}
1756 	if (slp->ns_frag) {
1757 		mbuf_freem(slp->ns_frag);
1758 	}
1759 	slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
1760 	slp->ns_reccnt = 0;
1761 
1762 	for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
1763 		nnwp = nwp->nd_tq.le_next;
1764 		LIST_REMOVE(nwp, nd_tq);
1765 		nfsm_chain_cleanup(&nwp->nd_nmreq);
1766 		if (nwp->nd_mrep) {
1767 			mbuf_freem(nwp->nd_mrep);
1768 		}
1769 		if (nwp->nd_nam2) {
1770 			mbuf_freem(nwp->nd_nam2);
1771 		}
1772 		if (IS_VALID_CRED(nwp->nd_cr)) {
1773 			kauth_cred_unref(&nwp->nd_cr);
1774 		}
1775 		if (nwp->nd_gss_context) {
1776 			nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
1777 		}
1778 		NFS_ZFREE(nfsrv_descript_zone, nwp);
1779 	}
1780 	LIST_INIT(&slp->ns_tq);
1781 
1782 	lck_rw_destroy(&slp->ns_rwlock, &nfsrv_slp_rwlock_group);
1783 	lck_mtx_destroy(&slp->ns_wgmutex, &nfsrv_slp_mutex_group);
1784 	kfree_type(struct nfsrv_sock, slp);
1785 }
1786 
1787 /*
1788  * Derefence a server socket structure. If it has no more references and
1789  * is no longer valid, you can throw it away.
1790  */
1791 static void
nfsrv_slpderef_locked(struct nfsrv_sock * slp)1792 nfsrv_slpderef_locked(struct nfsrv_sock *slp)
1793 {
1794 	lck_rw_lock_exclusive(&slp->ns_rwlock);
1795 	slp->ns_sref--;
1796 
1797 	if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
1798 		if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
1799 			/* remove socket from queue since there's no work */
1800 			if (slp->ns_flag & SLP_WAITQ) {
1801 				TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1802 			} else {
1803 				TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1804 			}
1805 			slp->ns_flag &= ~SLP_QUEUED;
1806 		}
1807 		lck_rw_done(&slp->ns_rwlock);
1808 		return;
1809 	}
1810 
1811 	/* This socket is no longer valid, so we'll get rid of it */
1812 
1813 	if (slp->ns_flag & SLP_QUEUED) {
1814 		if (slp->ns_flag & SLP_WAITQ) {
1815 			TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1816 		} else {
1817 			TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1818 		}
1819 		slp->ns_flag &= ~SLP_QUEUED;
1820 	}
1821 	lck_rw_done(&slp->ns_rwlock);
1822 
1823 	TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1824 	if (slp->ns_sotype == SOCK_STREAM) {
1825 		nfsrv_sock_tcp_cnt--;
1826 	}
1827 
1828 	/* now remove from the write gather socket list */
1829 	if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1830 		TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1831 		slp->ns_wgq.tqe_next = SLPNOLIST;
1832 	}
1833 	nfsrv_slpfree(slp);
1834 }
1835 
1836 void
nfsrv_slpderef(struct nfsrv_sock * slp)1837 nfsrv_slpderef(struct nfsrv_sock *slp)
1838 {
1839 	lck_mtx_lock(&nfsd_mutex);
1840 	nfsrv_slpderef_locked(slp);
1841 	lck_mtx_unlock(&nfsd_mutex);
1842 }
1843 
1844 /*
1845  * Check periodically for idle sockest if needed and
1846  * zap them.
1847  */
1848 void
nfsrv_idlesock_timer(__unused void * param0,__unused void * param1)1849 nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
1850 {
1851 	struct nfsrv_sock *slp, *tslp;
1852 	struct timeval now;
1853 	time_t time_to_wait = nfsrv_sock_idle_timeout;
1854 
1855 	microuptime(&now);
1856 	lck_mtx_lock(&nfsd_mutex);
1857 
1858 	/* Turn off the timer if we're suppose to and get out */
1859 	if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) {
1860 		nfsrv_sock_idle_timeout = 0;
1861 	}
1862 	if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
1863 		nfsrv_idlesock_timer_on = 0;
1864 		lck_mtx_unlock(&nfsd_mutex);
1865 		return;
1866 	}
1867 
1868 	TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
1869 		lck_rw_lock_exclusive(&slp->ns_rwlock);
1870 		/* Skip udp and referenced sockets */
1871 		if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
1872 			lck_rw_done(&slp->ns_rwlock);
1873 			continue;
1874 		}
1875 		/*
1876 		 * If this is the first non-referenced socket that hasn't idle out,
1877 		 * use its time stamp to calculate the earlist time in the future
1878 		 * to start the next invocation of the timer. Since the nfsrv_socklist
1879 		 * is sorted oldest access to newest. Once we find the first one,
1880 		 * we're done and break out of the loop.
1881 		 */
1882 		if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) ||
1883 		    nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
1884 			time_to_wait -= now.tv_sec - slp->ns_timestamp;
1885 			if (time_to_wait < 1) {
1886 				time_to_wait = 1;
1887 			}
1888 			lck_rw_done(&slp->ns_rwlock);
1889 			break;
1890 		}
1891 		/*
1892 		 * Bump the ref count. nfsrv_slpderef below will destroy
1893 		 * the socket, since nfsrv_zapsock has closed it.
1894 		 */
1895 		slp->ns_sref++;
1896 		nfsrv_zapsock(slp);
1897 		lck_rw_done(&slp->ns_rwlock);
1898 		nfsrv_slpderef_locked(slp);
1899 	}
1900 
1901 	/* Start ourself back up */
1902 	nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1903 	/* Remember when the next timer will fire for nfssvc_addsock. */
1904 	nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
1905 	lck_mtx_unlock(&nfsd_mutex);
1906 }
1907 
1908 /*
1909  * Clean up the data structures for the server.
1910  */
1911 void
nfsrv_cleanup(void)1912 nfsrv_cleanup(void)
1913 {
1914 	struct nfsrv_sock *slp, *nslp;
1915 	struct timeval now;
1916 #if CONFIG_FSE
1917 	struct nfsrv_fmod *fp, *nfp;
1918 	int i;
1919 #endif
1920 
1921 	microuptime(&now);
1922 	for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
1923 		nslp = TAILQ_NEXT(slp, ns_chain);
1924 		lck_rw_lock_exclusive(&slp->ns_rwlock);
1925 		slp->ns_sref++;
1926 		if (slp->ns_flag & SLP_VALID) {
1927 			nfsrv_zapsock(slp);
1928 		}
1929 		lck_rw_done(&slp->ns_rwlock);
1930 		nfsrv_slpderef_locked(slp);
1931 	}
1932 #
1933 #if CONFIG_FSE
1934 	/*
1935 	 * Flush pending file write fsevents
1936 	 */
1937 	lck_mtx_lock(&nfsrv_fmod_mutex);
1938 	for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1939 		for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
1940 			/*
1941 			 * Fire off the content modified fsevent for each
1942 			 * entry, remove it from the list, and free it.
1943 			 */
1944 			if (nfsrv_fsevents_enabled) {
1945 				fp->fm_context.vc_thread = current_thread();
1946 				add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
1947 				    FSE_ARG_VNODE, fp->fm_vp,
1948 				    FSE_ARG_DONE);
1949 			}
1950 			vnode_put(fp->fm_vp);
1951 			kauth_cred_unref(&fp->fm_context.vc_ucred);
1952 			nfp = LIST_NEXT(fp, fm_link);
1953 			LIST_REMOVE(fp, fm_link);
1954 			kfree_type(struct nfsrv_fmod, fp);
1955 		}
1956 	}
1957 	nfsrv_fmod_pending = 0;
1958 	lck_mtx_unlock(&nfsrv_fmod_mutex);
1959 #endif
1960 
1961 	nfsrv_uc_cleanup();     /* Stop nfs socket up-call threads */
1962 
1963 	nfs_gss_svc_cleanup();  /* Remove any RPCSEC_GSS contexts */
1964 
1965 	nfsrv_cleancache();     /* And clear out server cache */
1966 
1967 	nfsrv_udpsock = NULL;
1968 	nfsrv_udp6sock = NULL;
1969 }
1970 
1971 #endif /* CONFIG_NFS_SERVER */
1972