xref: /xnu-11215.1.10/bsd/nfs/nfs_syscalls.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1989, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * Rick Macklem at The University of Guelph.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
65  * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66  */
67 
68 #include <nfs/nfs_conf.h>
69 
70 /*
71  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
72  * support for mandatory and extensible security protections.  This notice
73  * is included in support of clause 2.2 (b) of the Apple Public License,
74  * Version 2.0.
75  */
76 
77 #include <sys/file_internal.h>
78 #include <sys/vnode_internal.h>
79 #include <sys/uio_internal.h>
80 #include <sys/sysctl.h>
81 #include <sys/socketvar.h>
82 #include <sys/sysproto.h>
83 #include <sys/fsevents.h>
84 #include <kern/task.h>
85 
86 #include <security/audit/audit.h>
87 
88 #include <netinet/in.h>
89 #include <netinet/tcp.h>
90 #include <nfs/xdr_subs.h>
91 #include <nfs/rpcv2.h>
92 #include <nfs/nfsproto.h>
93 #include <nfs/nfs.h>
94 #include <nfs/nfsm_subs.h>
95 #include <nfs/nfsrvcache.h>
96 #include <nfs/nfs_gss.h>
97 #if CONFIG_MACF
98 #include <security/mac_framework.h>
99 #endif
100 
101 #if CONFIG_NFS_SERVER
102 
103 extern const nfsrv_proc_t nfsrv_procs[NFS_NPROCS];
104 
105 extern int nfsrv_wg_delay;
106 extern int nfsrv_wg_delay_v3;
107 
108 static int nfsrv_require_resv_port = 0;
109 static time_t  nfsrv_idlesock_timer_on = 0;
110 static int nfsrv_sock_tcp_cnt = 0;
111 #define NFSD_MIN_IDLE_TIMEOUT 30
112 static int nfsrv_sock_idle_timeout = 3600; /* One hour */
113 
114 int     nfssvc_export(user_addr_t argp);
115 int     nfssvc_exportstats(proc_t p, user_addr_t argp);
116 int     nfssvc_userstats(proc_t p, user_addr_t argp);
117 int     nfssvc_usercount(proc_t p, user_addr_t argp);
118 int     nfssvc_zerostats(void);
119 int     nfssvc_srvstats(proc_t p, user_addr_t argp);
120 int     nfssvc_nfsd(void);
121 int     nfssvc_addsock(socket_t, mbuf_t);
122 void    nfsrv_zapsock(struct nfsrv_sock *);
123 void    nfsrv_slpderef(struct nfsrv_sock *);
124 void    nfsrv_slpfree(struct nfsrv_sock *);
125 
126 #endif /* CONFIG_NFS_SERVER */
127 
128 /*
129  * sysctl stuff
130  */
131 SYSCTL_DECL(_vfs_generic);
132 SYSCTL_EXTENSIBLE_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs hinge");
133 
134 #if CONFIG_NFS_SERVER
135 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs server hinge");
136 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
137 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
138 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
139 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
140 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
141 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
142 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
143 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
144 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
145 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_debug_ctl, 0, "");
146 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, unprocessed_rpc_current, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_unprocessed_rpc_current, 0, "");
147 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, unprocessed_rpc_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_unprocessed_rpc_max, 0, "");
148 #if CONFIG_FSE
149 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
150 #endif
151 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
152 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
153 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
154 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
155 #ifdef NFS_UC_Q_DEBUG
156 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
157 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
158 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
159 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
160 #endif
161 #endif /* CONFIG_NFS_SERVER */
162 
163 /* NFS hooks */
164 
165 /* NFS hooks variables */
166 struct nfs_hooks_in nfsh = {
167 	.f_vinvalbuf      = NULL,
168 	.f_buf_page_inval = NULL
169 };
170 
171 /* NFS hooks registration functions */
172 void
nfs_register_hooks(struct nfs_hooks_in * inh,struct nfs_hooks_out * outh)173 nfs_register_hooks(struct nfs_hooks_in *inh, struct nfs_hooks_out *outh)
174 {
175 	if (inh) {
176 		nfsh.f_vinvalbuf = inh->f_vinvalbuf;
177 		nfsh.f_buf_page_inval = inh->f_buf_page_inval;
178 	}
179 
180 	if (outh) {
181 		outh->f_get_bsdthreadtask_info = get_bsdthreadtask_info;
182 	}
183 }
184 
185 void
nfs_unregister_hooks(void)186 nfs_unregister_hooks(void)
187 {
188 	memset(&nfsh, 0, sizeof(nfsh));
189 }
190 
191 /* NFS hooks wrappers */
192 int
nfs_vinvalbuf(vnode_t vp,int flags,vfs_context_t ctx,int intrflg)193 nfs_vinvalbuf(vnode_t vp, int flags, vfs_context_t ctx, int intrflg)
194 {
195 	if (nfsh.f_vinvalbuf == NULL) {
196 		return 0;
197 	}
198 
199 	return nfsh.f_vinvalbuf(vp, flags, ctx, intrflg);
200 }
201 
202 int
nfs_buf_page_inval(vnode_t vp,off_t offset)203 nfs_buf_page_inval(vnode_t vp, off_t offset)
204 {
205 	if (nfsh.f_buf_page_inval == NULL) {
206 		return 0;
207 	}
208 
209 	return nfsh.f_buf_page_inval(vp, offset);
210 }
211 
212 #if !CONFIG_NFS_SERVER
213 #define __no_nfs_server_unused      __unused
214 #else
215 #define __no_nfs_server_unused      /* nothing */
216 #endif
217 
218 /*
219  * NFS server system calls
220  * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
221  */
222 
223 #if CONFIG_NFS_SERVER
224 static struct nfs_exportfs *
nfsrv_find_exportfs(const char * ptr)225 nfsrv_find_exportfs(const char *ptr)
226 {
227 	struct nfs_exportfs *nxfs;
228 
229 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
230 		if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) {
231 			break;
232 		}
233 	}
234 	if (nxfs && strncmp(nxfs->nxfs_path, ptr, strlen(nxfs->nxfs_path))) {
235 		nxfs = NULL;
236 	}
237 
238 	return nxfs;
239 }
240 
241 static char *
nfsrv_export_remainder(char * path,char * nxfs_path)242 nfsrv_export_remainder(char *path, char *nxfs_path)
243 {
244 	int error;
245 	vnode_t vp, rvp;
246 	struct nameidata nd;
247 	size_t pathbuflen = MAXPATHLEN;
248 	char real_mntonname[MAXPATHLEN];
249 
250 	if (!strncmp(path, nxfs_path, strlen(nxfs_path))) {
251 		return path + strlen(nxfs_path);
252 	}
253 
254 	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
255 	    UIO_SYSSPACE, CAST_USER_ADDR_T(nxfs_path), vfs_context_current());
256 	error = namei(&nd);
257 	if (error) {
258 		return NULL;
259 	}
260 
261 	nameidone(&nd);
262 	vp = nd.ni_vp;
263 
264 	error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
265 	vnode_put(vp);
266 	if (error) {
267 		return NULL;
268 	}
269 
270 	error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen, VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
271 	vnode_put(rvp);
272 
273 	if (error || strncmp(path, real_mntonname, strlen(real_mntonname))) {
274 		return NULL;
275 	}
276 
277 	return path + strlen(real_mntonname);
278 }
279 /*
280  * Get file handle system call
281  */
282 int
getfh(proc_t p __no_nfs_server_unused,struct getfh_args * uap __no_nfs_server_unused,__unused int * retval)283 getfh(
284 	proc_t p __no_nfs_server_unused,
285 	struct getfh_args *uap __no_nfs_server_unused,
286 	__unused int *retval)
287 {
288 	vnode_t vp;
289 	struct nfs_filehandle nfh;
290 	int error, fhlen = 0, fidlen;
291 	struct nameidata nd;
292 	char path[MAXPATHLEN], real_mntonname[MAXPATHLEN], *ptr;
293 	size_t pathlen;
294 	struct nfs_exportfs *nxfs;
295 	struct nfs_export *nx;
296 
297 	/*
298 	 * Must be super user
299 	 */
300 	error = proc_suser(p);
301 	if (error) {
302 		return error;
303 	}
304 
305 	error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
306 	if (!error) {
307 		error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
308 	}
309 	if (error) {
310 		return error;
311 	}
312 	/* limit fh size to length specified (or v3 size by default) */
313 	if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) {
314 		fhlen = NFSV3_MAX_FH_SIZE;
315 	}
316 	fidlen = fhlen - sizeof(struct nfs_exphandle);
317 
318 	if (!nfsrv_is_initialized()) {
319 		return EINVAL;
320 	}
321 
322 	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
323 	    UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
324 	error = namei(&nd);
325 	if (error) {
326 		return error;
327 	}
328 	nameidone(&nd);
329 
330 	vp = nd.ni_vp;
331 
332 	// find exportfs that matches f_mntonname
333 	lck_rw_lock_shared(&nfsrv_export_rwlock);
334 	ptr = vfs_statfs(vnode_mount(vp))->f_mntonname;
335 	if ((nxfs = nfsrv_find_exportfs(ptr)) == NULL) {
336 		/*
337 		 * The f_mntonname might be a firmlink path.  Resolve
338 		 * it into a physical path and try again.
339 		 */
340 		size_t pathbuflen = MAXPATHLEN;
341 		vnode_t rvp;
342 
343 		error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
344 		if (error) {
345 			goto out;
346 		}
347 		error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen,
348 		    VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
349 		vnode_put(rvp);
350 		if (error) {
351 			goto out;
352 		}
353 		ptr = real_mntonname;
354 		nxfs = nfsrv_find_exportfs(ptr);
355 	}
356 	if (nxfs == NULL) {
357 		error = EINVAL;
358 		goto out;
359 	}
360 	// find export that best matches remainder of path
361 	if ((ptr = nfsrv_export_remainder(path, nxfs->nxfs_path)) == NULL) {
362 		error = EINVAL;
363 		goto out;
364 	}
365 
366 	while (*ptr && (*ptr == '/')) {
367 		ptr++;
368 	}
369 	LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
370 		size_t len = strlen(nx->nx_path);
371 		if (len == 0) { // we've hit the export entry for the root directory
372 			break;
373 		}
374 		if (!strncmp(nx->nx_path, ptr, len)) {
375 			break;
376 		}
377 	}
378 	if (!nx) {
379 		error = EINVAL;
380 		goto out;
381 	}
382 
383 	bzero(&nfh, sizeof(nfh));
384 	nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
385 	nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
386 	nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
387 	nfh.nfh_xh.nxh_flags = 0;
388 	nfh.nfh_xh.nxh_reserved = 0;
389 	nfh.nfh_len = fidlen;
390 	error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
391 	if (nfh.nfh_len > (uint32_t)fidlen) {
392 		error = EOVERFLOW;
393 	}
394 	nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
395 	nfh.nfh_len += sizeof(nfh.nfh_xh);
396 	nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
397 
398 out:
399 	lck_rw_done(&nfsrv_export_rwlock);
400 	vnode_put(vp);
401 	if (error) {
402 		return error;
403 	}
404 	/*
405 	 * At first blush, this may appear to leak a kernel stack
406 	 * address, but the copyout() never reaches &nfh.nfh_fhp
407 	 * (sizeof(fhandle_t) < sizeof(nfh)).
408 	 */
409 	error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
410 	return error;
411 }
412 
413 extern const struct fileops vnops;
414 
415 /*
416  * syscall for the rpc.lockd to use to translate a NFS file handle into
417  * an open descriptor.
418  *
419  * warning: do not remove the suser() call or this becomes one giant
420  * security hole.
421  */
422 int
fhopen(proc_t p __no_nfs_server_unused,struct fhopen_args * uap __no_nfs_server_unused,int32_t * retval __no_nfs_server_unused)423 fhopen(proc_t p __no_nfs_server_unused,
424     struct fhopen_args *uap __no_nfs_server_unused,
425     int32_t *retval __no_nfs_server_unused)
426 {
427 	vnode_t vp;
428 	struct nfs_filehandle nfh;
429 	struct nfs_export *nx;
430 	struct nfs_export_options *nxo;
431 	struct flock lf;
432 	struct fileproc *fp, *nfp;
433 	int fmode, error, type;
434 	int indx;
435 	vfs_context_t ctx = vfs_context_current();
436 	kauth_action_t action;
437 
438 	/*
439 	 * Must be super user
440 	 */
441 	error = suser(vfs_context_ucred(ctx), 0);
442 	if (error) {
443 		return error;
444 	}
445 
446 	if (!nfsrv_is_initialized()) {
447 		return EINVAL;
448 	}
449 
450 	fmode = FFLAGS(uap->flags);
451 	/* why not allow a non-read/write open for our lockd? */
452 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) {
453 		return EINVAL;
454 	}
455 
456 	error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
457 	if (error) {
458 		return error;
459 	}
460 	if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
461 	    (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) {
462 		return EINVAL;
463 	}
464 	error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
465 	if (error) {
466 		return error;
467 	}
468 	nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
469 
470 	lck_rw_lock_shared(&nfsrv_export_rwlock);
471 	/* now give me my vnode, it gets returned to me with a reference */
472 	error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
473 	lck_rw_done(&nfsrv_export_rwlock);
474 	if (error) {
475 		if (error == NFSERR_TRYLATER) {
476 			error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
477 		}
478 		return error;
479 	}
480 
481 	/*
482 	 * From now on we have to make sure not
483 	 * to forget about the vnode.
484 	 * Any error that causes an abort must vnode_put(vp).
485 	 * Just set error = err and 'goto bad;'.
486 	 */
487 
488 	/*
489 	 * from vn_open
490 	 */
491 	if (vnode_vtype(vp) == VSOCK) {
492 		error = EOPNOTSUPP;
493 		goto bad;
494 	}
495 
496 	/* disallow write operations on directories */
497 	if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
498 		error = EISDIR;
499 		goto bad;
500 	}
501 
502 #if CONFIG_MACF
503 	if ((error = mac_vnode_check_open(ctx, vp, fmode))) {
504 		goto bad;
505 	}
506 #endif
507 
508 	/* compute action to be authorized */
509 	action = 0;
510 	if (fmode & FREAD) {
511 		action |= KAUTH_VNODE_READ_DATA;
512 	}
513 	if (fmode & (FWRITE | O_TRUNC)) {
514 		action |= KAUTH_VNODE_WRITE_DATA;
515 	}
516 	if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) {
517 		goto bad;
518 	}
519 
520 	if ((error = VNOP_OPEN(vp, fmode, ctx))) {
521 		goto bad;
522 	}
523 	if ((error = vnode_ref_ext(vp, fmode, 0))) {
524 		goto bad;
525 	}
526 
527 	/*
528 	 * end of vn_open code
529 	 */
530 
531 	// starting here... error paths should call vn_close/vnode_put
532 	if ((error = falloc(p, &nfp, &indx)) != 0) {
533 		vn_close(vp, fmode & FMASK, ctx);
534 		goto bad;
535 	}
536 	fp = nfp;
537 
538 	fp->fp_glob->fg_flag = fmode & FMASK;
539 	fp->fp_glob->fg_ops = &vnops;
540 	fp_set_data(fp, vp);
541 
542 	// XXX do we really need to support this with fhopen()?
543 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
544 		lf.l_whence = SEEK_SET;
545 		lf.l_start = 0;
546 		lf.l_len = 0;
547 		if (fmode & O_EXLOCK) {
548 			lf.l_type = F_WRLCK;
549 		} else {
550 			lf.l_type = F_RDLCK;
551 		}
552 		type = F_FLOCK;
553 		if ((fmode & FNONBLOCK) == 0) {
554 			type |= F_WAIT;
555 		}
556 		if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf, type, ctx, NULL))) {
557 			struct vfs_context context = *vfs_context_current();
558 			/* Modify local copy (to not damage thread copy) */
559 			context.vc_ucred = fp->fp_glob->fg_cred;
560 
561 			vn_close(vp, fp->fp_glob->fg_flag, &context);
562 			fp_free(p, indx, fp);
563 			goto bad;
564 		}
565 		fp->fp_glob->fg_flag |= FWASLOCKED;
566 	}
567 
568 	vnode_put(vp);
569 
570 	proc_fdlock(p);
571 	procfdtbl_releasefd(p, indx, NULL);
572 	fp_drop(p, indx, fp, 1);
573 	proc_fdunlock(p);
574 
575 	*retval = indx;
576 	return 0;
577 
578 bad:
579 	vnode_put(vp);
580 	return error;
581 }
582 
583 /*
584  * NFS server pseudo system call
585  */
586 int
nfssvc(proc_t p __no_nfs_server_unused,struct nfssvc_args * uap __no_nfs_server_unused,__unused int * retval)587 nfssvc(proc_t p __no_nfs_server_unused,
588     struct nfssvc_args *uap __no_nfs_server_unused,
589     __unused int *retval)
590 {
591 	mbuf_t nam;
592 	struct user_nfsd_args user_nfsdarg;
593 	socket_t so;
594 	int error;
595 
596 	AUDIT_ARG(cmd, uap->flag);
597 
598 	/*
599 	 * Must be super user for NFSSVC_NFSD and NFSSVC_ADDSOCK operations.
600 	 */
601 	if ((uap->flag & (NFSSVC_NFSD | NFSSVC_ADDSOCK)) && ((error = proc_suser(p)))) {
602 		return error;
603 	}
604 #if CONFIG_MACF
605 	error = mac_system_check_nfsd(kauth_cred_get());
606 	if (error) {
607 		return error;
608 	}
609 #endif
610 
611 	/* make sure NFS server data structures have been initialized */
612 	nfsrv_init();
613 
614 	if (uap->flag & NFSSVC_ADDSOCK) {
615 		if (IS_64BIT_PROCESS(p)) {
616 			error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
617 		} else {
618 			struct nfsd_args    tmp_args;
619 			error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
620 			if (error == 0) {
621 				user_nfsdarg.sock = tmp_args.sock;
622 				user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
623 				user_nfsdarg.namelen = tmp_args.namelen;
624 			}
625 		}
626 		if (error) {
627 			return error;
628 		}
629 		/* get the socket */
630 		error = file_socket(user_nfsdarg.sock, &so);
631 		if (error) {
632 			return error;
633 		}
634 		/* Get the client address for connected sockets. */
635 		if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
636 			nam = NULL;
637 		} else {
638 			error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
639 			if (error) {
640 				/* drop the iocount file_socket() grabbed on the file descriptor */
641 				file_drop(user_nfsdarg.sock);
642 				return error;
643 			}
644 		}
645 		/*
646 		 * nfssvc_addsock() will grab a retain count on the socket
647 		 * to keep the socket from being closed when nfsd closes its
648 		 * file descriptor for it.
649 		 */
650 		error = nfssvc_addsock(so, nam);
651 		/* drop the iocount file_socket() grabbed on the file descriptor */
652 		file_drop(user_nfsdarg.sock);
653 	} else if (uap->flag & NFSSVC_NFSD) {
654 		error = nfssvc_nfsd();
655 	} else if (uap->flag & NFSSVC_EXPORT) {
656 		error = nfssvc_export(uap->argp);
657 	} else if (uap->flag & NFSSVC_EXPORTSTATS) {
658 		error = nfssvc_exportstats(p, uap->argp);
659 	} else if (uap->flag & NFSSVC_USERSTATS) {
660 		error = nfssvc_userstats(p, uap->argp);
661 	} else if (uap->flag & NFSSVC_USERCOUNT) {
662 		error = nfssvc_usercount(p, uap->argp);
663 	} else if (uap->flag & NFSSVC_ZEROSTATS) {
664 		error = nfssvc_zerostats();
665 	} else if (uap->flag & NFSSVC_SRVSTATS) {
666 		error = nfssvc_srvstats(p, uap->argp);
667 	} else {
668 		error = EINVAL;
669 	}
670 	if (error == EINTR || error == ERESTART) {
671 		error = 0;
672 	}
673 	return error;
674 }
675 
676 /*
677  * Adds a socket to the list for servicing by nfsds.
678  */
679 int
nfssvc_addsock(socket_t so,mbuf_t mynam)680 nfssvc_addsock(socket_t so, mbuf_t mynam)
681 {
682 	struct nfsrv_sock *slp;
683 	int error = 0, sodomain, sotype, soprotocol, on = 1;
684 	int first;
685 	struct timeval timeo;
686 	uint64_t sobufsize;
687 
688 	/* make sure mbuf constants are set up */
689 	if (!nfs_mbuf_mhlen) {
690 		nfs_mbuf_init();
691 	}
692 
693 	sock_gettype(so, &sodomain, &sotype, &soprotocol);
694 
695 	/* There should be only one UDP socket for each of IPv4 and IPv6 */
696 	if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
697 		mbuf_freem(mynam);
698 		return EEXIST;
699 	}
700 	if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
701 		mbuf_freem(mynam);
702 		return EEXIST;
703 	}
704 
705 	/* Set protocol options and reserve some space (for UDP). */
706 	if (sotype == SOCK_STREAM) {
707 		error = nfsrv_check_exports_allow_address(mynam);
708 		if (error) {
709 			log(LOG_INFO, "nfsvc_addsock:: nfsrv_check_exports_allow_address(myname) returned %d\n", error);
710 			mbuf_freem(mynam);
711 			return error;
712 		}
713 		sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
714 	}
715 	if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) {
716 		sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
717 	}
718 
719 	/* Set socket buffer sizes for UDP/TCP */
720 	sobufsize = (sotype == SOCK_DGRAM) ? NFS_UDPSOCKBUF : NFSRV_TCPSOCKBUF;
721 	error = sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &sobufsize, sizeof(sobufsize));
722 	if (error) {
723 		log(LOG_INFO, "nfssvc_addsock: socket buffer setting SO_SNDBUF to %llu error(s) %d\n", sobufsize, error);
724 	}
725 
726 	error = sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &sobufsize, sizeof(sobufsize));
727 	if (error) {
728 		log(LOG_INFO, "nfssvc_addsock: socket buffer setting SO_RCVBUF to %llu error(s) %d\n", sobufsize, error);
729 	}
730 	sock_nointerrupt(so, 0);
731 
732 	/*
733 	 * Set socket send/receive timeouts.
734 	 * Receive timeout shouldn't matter, but setting the send timeout
735 	 * will make sure that an unresponsive client can't hang the server.
736 	 */
737 	timeo.tv_usec = 0;
738 	timeo.tv_sec = 1;
739 	error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
740 	if (error) {
741 		log(LOG_INFO, "nfssvc_addsock: socket timeout setting SO_RCVTIMEO error(s) %d\n", error);
742 	}
743 
744 	timeo.tv_sec = 30;
745 	error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
746 	if (error) {
747 		log(LOG_INFO, "nfssvc_addsock: socket timeout setting SO_SNDTIMEO error(s) %d\n", error);
748 	}
749 
750 	slp = kalloc_type(struct nfsrv_sock, Z_WAITOK | Z_ZERO | Z_NOFAIL);
751 	lck_rw_init(&slp->ns_rwlock, &nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
752 	lck_mtx_init(&slp->ns_wgmutex, &nfsrv_slp_mutex_group, LCK_ATTR_NULL);
753 
754 	lck_mtx_lock(&nfsd_mutex);
755 
756 	if (soprotocol == IPPROTO_UDP) {
757 		if (sodomain == AF_INET) {
758 			/* There should be only one UDP/IPv4 socket */
759 			if (nfsrv_udpsock) {
760 				lck_mtx_unlock(&nfsd_mutex);
761 				nfsrv_slpfree(slp);
762 				mbuf_freem(mynam);
763 				return EEXIST;
764 			}
765 			nfsrv_udpsock = slp;
766 		}
767 		if (sodomain == AF_INET6) {
768 			/* There should be only one UDP/IPv6 socket */
769 			if (nfsrv_udp6sock) {
770 				lck_mtx_unlock(&nfsd_mutex);
771 				nfsrv_slpfree(slp);
772 				mbuf_freem(mynam);
773 				return EEXIST;
774 			}
775 			nfsrv_udp6sock = slp;
776 		}
777 	}
778 
779 	/* add the socket to the list */
780 	first = TAILQ_EMPTY(&nfsrv_socklist);
781 	TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
782 	if (sotype == SOCK_STREAM) {
783 		nfsrv_sock_tcp_cnt++;
784 		if (nfsrv_sock_idle_timeout < 0) {
785 			nfsrv_sock_idle_timeout = 0;
786 		}
787 		if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) {
788 			nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
789 		}
790 		/*
791 		 * Possibly start or stop the idle timer. We only start the idle timer when
792 		 * we have more than 2 * nfsd_thread_max connections. If the idle timer is
793 		 * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
794 		 * the number of connections.
795 		 */
796 		if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
797 			if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
798 				if (nfsrv_idlesock_timer_on) {
799 					thread_call_cancel(nfsrv_idlesock_timer_call);
800 					nfsrv_idlesock_timer_on = 0;
801 				}
802 			} else {
803 				struct nfsrv_sock *old_slp;
804 				struct timeval now;
805 				microuptime(&now);
806 				time_t time_to_wait = nfsrv_sock_idle_timeout;
807 				/*
808 				 * Get the oldest tcp socket and calculate the
809 				 * earliest time for the next idle timer to fire
810 				 * based on the possibly updated nfsrv_sock_idle_timeout
811 				 */
812 				TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
813 					if (old_slp->ns_sotype == SOCK_STREAM) {
814 						time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
815 						if (time_to_wait < 1) {
816 							time_to_wait = 1;
817 						}
818 						break;
819 					}
820 				}
821 				/*
822 				 * If we have a timer scheduled, but if its going to fire too late,
823 				 * turn it off.
824 				 */
825 				if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
826 					thread_call_cancel(nfsrv_idlesock_timer_call);
827 					nfsrv_idlesock_timer_on = 0;
828 				}
829 				/* Schedule the idle thread if it isn't already */
830 				if (!nfsrv_idlesock_timer_on) {
831 					nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
832 					nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
833 				}
834 			}
835 		}
836 	}
837 
838 	sock_retain(so); /* grab a retain count on the socket */
839 	slp->ns_so = so;
840 	slp->ns_sotype = sotype;
841 	slp->ns_nam = mynam;
842 
843 	/* set up the socket up-call */
844 	nfsrv_uc_addsock(slp, first);
845 
846 	/* mark that the socket is not in the nfsrv_sockwg list */
847 	slp->ns_wgq.tqe_next = SLPNOLIST;
848 
849 	slp->ns_flag = SLP_VALID | SLP_NEEDQ;
850 
851 	nfsrv_wakenfsd(slp);
852 	lck_mtx_unlock(&nfsd_mutex);
853 
854 	return 0;
855 }
856 
857 /*
858  * nfssvc_nfsd()
859  *
860  * nfsd theory of operation:
861  *
862  * The first nfsd thread stays in user mode accepting new TCP connections
863  * which are then added via the "addsock" call.  The rest of the nfsd threads
864  * simply call into the kernel and remain there in a loop handling NFS
865  * requests until killed by a signal.
866  *
867  * There's a list of nfsd threads (nfsd_head).
868  * There's an nfsd queue that contains only those nfsds that are
869  *   waiting for work to do (nfsd_queue).
870  *
871  * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
872  *   managing the work on the sockets:
873  *   nfsrv_sockwait - sockets w/new data waiting to be worked on
874  *   nfsrv_sockwork - sockets being worked on which may have more work to do
875  *   nfsrv_sockwg -- sockets which have pending write gather data
876  * When a socket receives data, if it is not currently queued, it
877  *   will be placed at the end of the "wait" queue.
878  * Whenever a socket needs servicing we make sure it is queued and
879  *   wake up a waiting nfsd (if there is one).
880  *
881  * nfsds will service at most 8 requests from the same socket before
882  *   defecting to work on another socket.
883  * nfsds will defect immediately if there are any sockets in the "wait" queue
884  * nfsds looking for a socket to work on check the "wait" queue first and
885  *   then check the "work" queue.
886  * When an nfsd starts working on a socket, it removes it from the head of
887  *   the queue it's currently on and moves it to the end of the "work" queue.
888  * When nfsds are checking the queues for work, any sockets found not to
889  *   have any work are simply dropped from the queue.
890  *
891  */
892 int
nfssvc_nfsd(void)893 nfssvc_nfsd(void)
894 {
895 	mbuf_t m, mrep = NULL;
896 	struct nfsrv_sock *slp;
897 	struct nfsd *nfsd;
898 	struct nfsrv_descript *nd = NULL;
899 	int error = 0, cacherep, writes_todo;
900 	int siz, procrastinate, opcnt = 0;
901 	time_t cur_usec;
902 	struct timeval now;
903 	struct vfs_context context;
904 	struct timespec to;
905 
906 #ifndef nolint
907 	cacherep = RC_DOIT;
908 	writes_todo = 0;
909 #endif
910 
911 	nfsd = kalloc_type(struct nfsd, Z_WAITOK | Z_ZERO | Z_NOFAIL);
912 	lck_mtx_lock(&nfsd_mutex);
913 	if (nfsd_thread_count++ == 0) {
914 		nfsrv_initcache();              /* Init the server request cache */
915 	}
916 	TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
917 	lck_mtx_unlock(&nfsd_mutex);
918 
919 	context.vc_thread = current_thread();
920 
921 	/* Set time out so that nfsd threads can wake up a see if they are still needed. */
922 	to.tv_sec = 5;
923 	to.tv_nsec = 0;
924 
925 	/*
926 	 * Loop getting rpc requests until SIGKILL.
927 	 */
928 	for (;;) {
929 		if (nfsd_thread_max <= 0) {
930 			/* NFS server shutting down, get out ASAP */
931 			error = EINTR;
932 			slp = nfsd->nfsd_slp;
933 		} else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
934 			/* already have some work to do */
935 			error = 0;
936 			slp = nfsd->nfsd_slp;
937 		} else {
938 			/* need to find work to do */
939 			error = 0;
940 			lck_mtx_lock(&nfsd_mutex);
941 			while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
942 				if (nfsd_thread_count > nfsd_thread_max) {
943 					/*
944 					 * If we have no socket and there are more
945 					 * nfsd threads than configured, let's exit.
946 					 */
947 					error = 0;
948 					goto done;
949 				}
950 				nfsd->nfsd_flag |= NFSD_WAITING;
951 				TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
952 				error = msleep(nfsd, &nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
953 				if (error) {
954 					if (nfsd->nfsd_flag & NFSD_WAITING) {
955 						TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
956 						nfsd->nfsd_flag &= ~NFSD_WAITING;
957 					}
958 					if (error == EWOULDBLOCK) {
959 						continue;
960 					}
961 					goto done;
962 				}
963 			}
964 			slp = nfsd->nfsd_slp;
965 			if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
966 				/* look for a socket to work on in the wait queue */
967 				while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
968 					lck_rw_lock_exclusive(&slp->ns_rwlock);
969 					/* remove from the head of the queue */
970 					TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
971 					slp->ns_flag &= ~SLP_WAITQ;
972 					if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
973 						break;
974 					}
975 					/* nothing to do, so skip this socket */
976 					lck_rw_done(&slp->ns_rwlock);
977 				}
978 			}
979 			if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
980 				/* look for a socket to work on in the work queue */
981 				while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
982 					lck_rw_lock_exclusive(&slp->ns_rwlock);
983 					/* remove from the head of the queue */
984 					TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
985 					slp->ns_flag &= ~SLP_WORKQ;
986 					if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
987 						break;
988 					}
989 					/* nothing to do, so skip this socket */
990 					lck_rw_done(&slp->ns_rwlock);
991 				}
992 			}
993 			if (!nfsd->nfsd_slp && slp) {
994 				/* we found a socket to work on, grab a reference */
995 				slp->ns_sref++;
996 				microuptime(&now);
997 				slp->ns_timestamp = now.tv_sec;
998 				/* We keep the socket list in least recently used order for reaping idle sockets */
999 				TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1000 				TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
1001 				nfsd->nfsd_slp = slp;
1002 				opcnt = 0;
1003 				/* and put it at the back of the work queue */
1004 				TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
1005 				slp->ns_flag |= SLP_WORKQ;
1006 				lck_rw_done(&slp->ns_rwlock);
1007 			}
1008 			lck_mtx_unlock(&nfsd_mutex);
1009 			if (!slp) {
1010 				continue;
1011 			}
1012 			lck_rw_lock_exclusive(&slp->ns_rwlock);
1013 			if (slp->ns_flag & SLP_VALID) {
1014 				if ((slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)) == SLP_NEEDQ) {
1015 					slp->ns_flag &= ~SLP_NEEDQ;
1016 					nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
1017 				}
1018 				if (slp->ns_flag & SLP_DISCONN) {
1019 					nfsrv_zapsock(slp);
1020 				}
1021 				error = nfsrv_dorec(slp, nfsd, &nd);
1022 				if (error == EINVAL) {  // RPCSEC_GSS drop
1023 					if (slp->ns_sotype == SOCK_STREAM) {
1024 						nfsrv_zapsock(slp); // drop connection
1025 					}
1026 				}
1027 				writes_todo = 0;
1028 				if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
1029 					microuptime(&now);
1030 					cur_usec = (now.tv_sec * 1000000) + now.tv_usec;
1031 					if (slp->ns_wgtime <= cur_usec) {
1032 						error = 0;
1033 						cacherep = RC_DOIT;
1034 						writes_todo = 1;
1035 					}
1036 					slp->ns_flag &= ~SLP_DOWRITES;
1037 				}
1038 				nfsd->nfsd_flag |= NFSD_REQINPROG;
1039 			}
1040 			lck_rw_done(&slp->ns_rwlock);
1041 		}
1042 		if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
1043 			if (nd) {
1044 				nfsm_chain_cleanup(&nd->nd_nmreq);
1045 				if (nd->nd_nam2) {
1046 					mbuf_freem(nd->nd_nam2);
1047 				}
1048 				if (IS_VALID_CRED(nd->nd_cr)) {
1049 					kauth_cred_unref(&nd->nd_cr);
1050 				}
1051 				if (nd->nd_gss_context) {
1052 					nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1053 				}
1054 				NFS_ZFREE(nfsrv_descript_zone, nd);
1055 			}
1056 			nfsd->nfsd_slp = NULL;
1057 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1058 			if (slp) {
1059 				nfsrv_slpderef(slp);
1060 			}
1061 			if (nfsd_thread_max <= 0) {
1062 				break;
1063 			}
1064 			continue;
1065 		}
1066 		if (nd) {
1067 			microuptime(&nd->nd_starttime);
1068 			if (nd->nd_nam2) {
1069 				nd->nd_nam = nd->nd_nam2;
1070 			} else {
1071 				nd->nd_nam = slp->ns_nam;
1072 			}
1073 
1074 			cacherep = nfsrv_getcache(nd, slp, &mrep);
1075 
1076 			if (nfsrv_require_resv_port) {
1077 				/* Check if source port is a reserved port */
1078 				in_port_t port = 0;
1079 				struct sockaddr *saddr = mbuf_data(nd->nd_nam);
1080 
1081 				if (saddr->sa_family == AF_INET) {
1082 					port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
1083 				} else if (saddr->sa_family == AF_INET6) {
1084 					port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
1085 				}
1086 				if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
1087 					nd->nd_procnum = NFSPROC_NOOP;
1088 					nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1089 					cacherep = RC_DOIT;
1090 				}
1091 			}
1092 		}
1093 
1094 		/*
1095 		 * Loop to get all the write RPC replies that have been
1096 		 * gathered together.
1097 		 */
1098 		do {
1099 			switch (cacherep) {
1100 			case RC_DOIT:
1101 				if (nd && (nd->nd_vers == NFS_VER3)) {
1102 					procrastinate = nfsrv_wg_delay_v3;
1103 				} else {
1104 					procrastinate = nfsrv_wg_delay;
1105 				}
1106 				lck_rw_lock_shared(&nfsrv_export_rwlock);
1107 				context.vc_ucred = NULL;
1108 				if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) {
1109 					error = nfsrv_writegather(&nd, slp, &context, &mrep);
1110 				} else {
1111 					error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
1112 				}
1113 				lck_rw_done(&nfsrv_export_rwlock);
1114 				if (mrep == NULL) {
1115 					/*
1116 					 * If this is a stream socket and we are not going
1117 					 * to send a reply we better close the connection
1118 					 * so the client doesn't hang.
1119 					 */
1120 					if (error && slp->ns_sotype == SOCK_STREAM) {
1121 						lck_rw_lock_exclusive(&slp->ns_rwlock);
1122 						nfsrv_zapsock(slp);
1123 						lck_rw_done(&slp->ns_rwlock);
1124 						printf("NFS server: NULL reply from proc = %d error = %d\n",
1125 						    nd->nd_procnum, error);
1126 					}
1127 					break;
1128 				}
1129 				if (error) {
1130 					OSAddAtomic64(1, &nfsrvstats.srv_errs);
1131 					nfsrv_updatecache(nd, FALSE, mrep);
1132 					if (nd->nd_nam2) {
1133 						mbuf_freem(nd->nd_nam2);
1134 						nd->nd_nam2 = NULL;
1135 					}
1136 					break;
1137 				}
1138 				OSAddAtomic64(1, &nfsrvstats.srvrpccntv3[nd->nd_procnum]);
1139 				nfsrv_updatecache(nd, TRUE, mrep);
1140 				OS_FALLTHROUGH;
1141 
1142 			case RC_REPLY:
1143 				if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1144 					/*
1145 					 * Need to checksum or encrypt the reply
1146 					 */
1147 					error = nfs_gss_svc_protect_reply(nd, mrep);
1148 					if (error) {
1149 						mbuf_freem(mrep);
1150 						break;
1151 					}
1152 				}
1153 
1154 				/*
1155 				 * Get the total size of the reply
1156 				 */
1157 				m = mrep;
1158 				siz = 0;
1159 				while (m) {
1160 					siz += mbuf_len(m);
1161 					m = mbuf_next(m);
1162 				}
1163 				if (siz <= 0 || siz > NFS_MAXPACKET) {
1164 					printf("mbuf siz=%d\n", siz);
1165 					panic("Bad nfs svc reply");
1166 				}
1167 				m = mrep;
1168 				mbuf_pkthdr_setlen(m, siz);
1169 				error = mbuf_pkthdr_setrcvif(m, NULL);
1170 				if (error) {
1171 					panic("nfsd setrcvif failed: %d", error);
1172 				}
1173 				/*
1174 				 * For stream protocols, prepend a Sun RPC
1175 				 * Record Mark.
1176 				 */
1177 				if (slp->ns_sotype == SOCK_STREAM) {
1178 					error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1179 					if (!error) {
1180 						*(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
1181 					}
1182 				}
1183 				if (!error) {
1184 					if (slp->ns_flag & SLP_VALID) {
1185 						error = nfsrv_send(slp, nd->nd_nam2, m);
1186 					} else {
1187 						error = EPIPE;
1188 						mbuf_freem(m);
1189 					}
1190 				} else {
1191 					mbuf_freem(m);
1192 				}
1193 				mrep = NULL;
1194 				if (nd->nd_nam2) {
1195 					mbuf_freem(nd->nd_nam2);
1196 					nd->nd_nam2 = NULL;
1197 				}
1198 				if (error == EPIPE) {
1199 					lck_rw_lock_exclusive(&slp->ns_rwlock);
1200 					nfsrv_zapsock(slp);
1201 					lck_rw_done(&slp->ns_rwlock);
1202 				}
1203 				if (error == EINTR || error == ERESTART) {
1204 					nfsm_chain_cleanup(&nd->nd_nmreq);
1205 					if (IS_VALID_CRED(nd->nd_cr)) {
1206 						kauth_cred_unref(&nd->nd_cr);
1207 					}
1208 					if (nd->nd_gss_context) {
1209 						nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1210 					}
1211 					NFS_ZFREE(nfsrv_descript_zone, nd);
1212 					nfsrv_slpderef(slp);
1213 					lck_mtx_lock(&nfsd_mutex);
1214 					goto done;
1215 				}
1216 				break;
1217 			case RC_DROPIT:
1218 				mbuf_freem(nd->nd_nam2);
1219 				nd->nd_nam2 = NULL;
1220 				break;
1221 			}
1222 			;
1223 			opcnt++;
1224 			if (nd) {
1225 				nfsm_chain_cleanup(&nd->nd_nmreq);
1226 				if (nd->nd_nam2) {
1227 					mbuf_freem(nd->nd_nam2);
1228 				}
1229 				if (IS_VALID_CRED(nd->nd_cr)) {
1230 					kauth_cred_unref(&nd->nd_cr);
1231 				}
1232 				if (nd->nd_gss_context) {
1233 					nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1234 				}
1235 				NFS_ZFREE(nfsrv_descript_zone, nd);
1236 			}
1237 
1238 			/*
1239 			 * Check to see if there are outstanding writes that
1240 			 * need to be serviced.
1241 			 */
1242 			writes_todo = 0;
1243 			if (slp->ns_wgtime) {
1244 				microuptime(&now);
1245 				cur_usec = (now.tv_sec * 1000000) + now.tv_usec;
1246 				if (slp->ns_wgtime <= cur_usec) {
1247 					cacherep = RC_DOIT;
1248 					writes_todo = 1;
1249 				}
1250 			}
1251 		} while (writes_todo);
1252 
1253 		nd = NULL;
1254 		if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1255 			lck_rw_lock_exclusive(&slp->ns_rwlock);
1256 			error = nfsrv_dorec(slp, nfsd, &nd);
1257 			if (error == EINVAL) {  // RPCSEC_GSS drop
1258 				if (slp->ns_sotype == SOCK_STREAM) {
1259 					nfsrv_zapsock(slp); // drop connection
1260 				}
1261 			}
1262 			lck_rw_done(&slp->ns_rwlock);
1263 		}
1264 		if (!nd) {
1265 			/* drop our reference on the socket */
1266 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1267 			nfsd->nfsd_slp = NULL;
1268 			nfsrv_slpderef(slp);
1269 		}
1270 	}
1271 	lck_mtx_lock(&nfsd_mutex);
1272 done:
1273 	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
1274 	kfree_type(struct nfsd, nfsd);
1275 	if (--nfsd_thread_count == 0) {
1276 		nfsrv_cleanup();
1277 	}
1278 	lck_mtx_unlock(&nfsd_mutex);
1279 	return error;
1280 }
1281 
1282 int
nfssvc_export(user_addr_t argp)1283 nfssvc_export(user_addr_t argp)
1284 {
1285 	int error = 0, is_64bit;
1286 	struct user_nfs_export_args unxa;
1287 	vfs_context_t ctx = vfs_context_current();
1288 
1289 	is_64bit = vfs_context_is64bit(ctx);
1290 
1291 	/* copy in pointers to path and export args */
1292 	if (is_64bit) {
1293 		error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1294 	} else {
1295 		struct nfs_export_args tnxa;
1296 		error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1297 		if (error == 0) {
1298 			/* munge into LP64 version of nfs_export_args structure */
1299 			unxa.nxa_fsid = tnxa.nxa_fsid;
1300 			unxa.nxa_expid = tnxa.nxa_expid;
1301 			unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1302 			unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1303 			unxa.nxa_flags = tnxa.nxa_flags;
1304 			unxa.nxa_netcount = tnxa.nxa_netcount;
1305 			unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1306 		}
1307 	}
1308 	if (error) {
1309 		return error;
1310 	}
1311 
1312 	error = nfsrv_export(&unxa, ctx);
1313 
1314 	return error;
1315 }
1316 
1317 int
nfssvc_exportstats(proc_t p,user_addr_t argp)1318 nfssvc_exportstats(proc_t p, user_addr_t argp)
1319 {
1320 	int error = 0;
1321 	uint pos;
1322 	struct nfs_exportfs *nxfs;
1323 	struct nfs_export *nx;
1324 	struct nfs_export_stat_desc stat_desc = {};
1325 	struct nfs_export_stat_rec statrec;
1326 	uint numExports, totlen, count;
1327 	size_t numRecs;
1328 	user_addr_t oldp, newlenp;
1329 	user_size_t oldlen, newlen;
1330 	struct user_iovec iov[2];
1331 
1332 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1333 	if (error) {
1334 		return error;
1335 	}
1336 
1337 	oldp = iov[0].iov_base;
1338 	oldlen = iov[0].iov_len;
1339 	newlenp = iov[1].iov_base;
1340 	newlen = iov[1].iov_len;
1341 
1342 	/* setup export stat descriptor */
1343 	stat_desc.rec_vers = NFS_EXPORT_STAT_REC_VERSION;
1344 
1345 	if (!nfsrv_is_initialized()) {
1346 		stat_desc.rec_count = 0;
1347 		if (oldp && (oldlen >= sizeof(struct nfs_export_stat_desc))) {
1348 			error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1349 		}
1350 		size_t stat_desc_size = sizeof(struct nfs_export_stat_desc);
1351 		if (!error && newlenp && newlen >= sizeof(stat_desc_size)) {
1352 			error = copyout(&stat_desc_size, newlenp, sizeof(stat_desc_size));
1353 		}
1354 		return error;
1355 	}
1356 
1357 	/* Count the number of exported directories */
1358 	lck_rw_lock_shared(&nfsrv_export_rwlock);
1359 	numExports = 0;
1360 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next)
1361 	LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next)
1362 	numExports += 1;
1363 
1364 	/* update stat descriptor's export record count */
1365 	stat_desc.rec_count = numExports;
1366 
1367 	/* calculate total size of required buffer */
1368 	totlen = sizeof(struct nfs_export_stat_desc) + (numExports * sizeof(struct nfs_export_stat_rec));
1369 
1370 	/* Check caller's buffer */
1371 	if (oldp == 0 || newlenp == 0) {
1372 		lck_rw_done(&nfsrv_export_rwlock);
1373 		/* indicate required buffer len */
1374 		if (newlenp && newlen >= sizeof(totlen)) {
1375 			error = copyout(&totlen, newlenp, sizeof(totlen));
1376 		}
1377 		return error;
1378 	}
1379 
1380 	/* We require the caller's buffer to be at least large enough to hold the descriptor */
1381 	if (oldlen < sizeof(struct nfs_export_stat_desc) || newlen < sizeof(totlen)) {
1382 		lck_rw_done(&nfsrv_export_rwlock);
1383 		/* indicate required buffer len */
1384 		if (newlenp && newlen >= sizeof(totlen)) {
1385 			(void)copyout(&totlen, newlenp, sizeof(totlen));
1386 		}
1387 		return ENOMEM;
1388 	}
1389 
1390 	/* indicate required buffer len */
1391 	error = copyout(&totlen, newlenp, sizeof(totlen));
1392 	if (error) {
1393 		lck_rw_done(&nfsrv_export_rwlock);
1394 		return error;
1395 	}
1396 
1397 	/* check if export table is empty */
1398 	if (!numExports) {
1399 		lck_rw_done(&nfsrv_export_rwlock);
1400 		error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1401 		return error;
1402 	}
1403 
1404 	/* calculate how many actual export stat records fit into caller's buffer */
1405 	numRecs = (totlen - sizeof(struct nfs_export_stat_desc)) / sizeof(struct nfs_export_stat_rec);
1406 
1407 	if (!numRecs) {
1408 		/* caller's buffer can only accomodate descriptor */
1409 		lck_rw_done(&nfsrv_export_rwlock);
1410 		stat_desc.rec_count = 0;
1411 		error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1412 		return error;
1413 	}
1414 
1415 	/* adjust to actual number of records to copyout to caller's buffer */
1416 	if (numRecs > numExports) {
1417 		numRecs = numExports;
1418 	}
1419 
1420 	/* set actual number of records we are returning */
1421 	stat_desc.rec_count = numRecs;
1422 
1423 	/* first copy out the stat descriptor */
1424 	pos = 0;
1425 	error = copyout(&stat_desc, oldp + pos, sizeof(struct nfs_export_stat_desc));
1426 	if (error) {
1427 		lck_rw_done(&nfsrv_export_rwlock);
1428 		return error;
1429 	}
1430 	pos += sizeof(struct nfs_export_stat_desc);
1431 
1432 	/* Loop through exported directories */
1433 	count = 0;
1434 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
1435 		LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
1436 			if (count >= numRecs) {
1437 				break;
1438 			}
1439 
1440 			/* build exported filesystem path */
1441 			memset(statrec.path, 0, sizeof(statrec.path));
1442 			snprintf(statrec.path, sizeof(statrec.path), "%s%s%s",
1443 			    nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
1444 			    nx->nx_path);
1445 
1446 			/* build the 64-bit export stat counters */
1447 			statrec.ops = ((uint64_t)nx->nx_stats.ops.hi << 32) |
1448 			    nx->nx_stats.ops.lo;
1449 			statrec.bytes_read = ((uint64_t)nx->nx_stats.bytes_read.hi << 32) |
1450 			    nx->nx_stats.bytes_read.lo;
1451 			statrec.bytes_written = ((uint64_t)nx->nx_stats.bytes_written.hi << 32) |
1452 			    nx->nx_stats.bytes_written.lo;
1453 			error = copyout(&statrec, oldp + pos, sizeof(statrec));
1454 			if (error) {
1455 				lck_rw_done(&nfsrv_export_rwlock);
1456 				return error;
1457 			}
1458 			/* advance buffer position */
1459 			pos += sizeof(statrec);
1460 		}
1461 	}
1462 	lck_rw_done(&nfsrv_export_rwlock);
1463 
1464 	return error;
1465 }
1466 
1467 int
nfssvc_userstats(proc_t p,user_addr_t argp)1468 nfssvc_userstats(proc_t p, user_addr_t argp)
1469 {
1470 	int error = 0;
1471 	struct nfs_exportfs *nxfs;
1472 	struct nfs_export *nx;
1473 	struct nfs_active_user_list *ulist;
1474 	struct nfs_user_stat_desc ustat_desc = {};
1475 	struct nfs_user_stat_node *unode, *unode_next;
1476 	struct nfs_user_stat_user_rec ustat_rec;
1477 	struct nfs_user_stat_path_rec upath_rec;
1478 	uint bytes_total, recs_copied, pos;
1479 	size_t bytes_avail;
1480 	user_addr_t oldp, newlenp;
1481 	user_size_t oldlen, newlen;
1482 	struct user_iovec iov[2];
1483 
1484 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1485 	if (error) {
1486 		return error;
1487 	}
1488 
1489 	oldp = iov[0].iov_base;
1490 	oldlen = iov[0].iov_len;
1491 	newlenp = iov[1].iov_base;
1492 	newlen = iov[1].iov_len;
1493 
1494 	/* init structures used for copying out of kernel */
1495 	ustat_desc.rec_vers = NFS_USER_STAT_REC_VERSION;
1496 	ustat_rec.rec_type = NFS_USER_STAT_USER_REC;
1497 	upath_rec.rec_type = NFS_USER_STAT_PATH_REC;
1498 
1499 	/* initialize counters */
1500 	bytes_total = sizeof(struct nfs_user_stat_desc);
1501 	bytes_avail  = oldlen;
1502 	recs_copied = 0;
1503 
1504 	if (!nfsrv_is_initialized()) { /* NFS server not initialized, so no stats */
1505 		goto ustat_skip;
1506 	}
1507 
1508 	/* reclaim old expired user nodes */
1509 	nfsrv_active_user_list_reclaim();
1510 
1511 	/* reserve space for the buffer descriptor */
1512 	if (bytes_avail >= sizeof(struct nfs_user_stat_desc)) {
1513 		bytes_avail -= sizeof(struct nfs_user_stat_desc);
1514 	} else {
1515 		bytes_avail = 0;
1516 	}
1517 
1518 	/* put buffer position past the buffer descriptor */
1519 	pos = sizeof(struct nfs_user_stat_desc);
1520 
1521 	/* Loop through exported directories */
1522 	lck_rw_lock_shared(&nfsrv_export_rwlock);
1523 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
1524 		LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
1525 			/* copy out path */
1526 			if (bytes_avail >= sizeof(struct nfs_user_stat_path_rec)) {
1527 				memset(upath_rec.path, 0, sizeof(upath_rec.path));
1528 				snprintf(upath_rec.path, sizeof(upath_rec.path), "%s%s%s",
1529 				    nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
1530 				    nx->nx_path);
1531 
1532 				error = copyout(&upath_rec, oldp + pos, sizeof(struct nfs_user_stat_path_rec));
1533 				if (error) {
1534 					/* punt */
1535 					goto ustat_done;
1536 				}
1537 
1538 				pos += sizeof(struct nfs_user_stat_path_rec);
1539 				bytes_avail -= sizeof(struct nfs_user_stat_path_rec);
1540 				recs_copied++;
1541 			} else {
1542 				/* Caller's buffer is exhausted */
1543 				bytes_avail = 0;
1544 			}
1545 
1546 			bytes_total += sizeof(struct nfs_user_stat_path_rec);
1547 
1548 			/* Scan through all user nodes of this export */
1549 			ulist = &nx->nx_user_list;
1550 			lck_mtx_lock(&ulist->user_mutex);
1551 			for (unode = TAILQ_FIRST(&ulist->user_lru); unode; unode = unode_next) {
1552 				unode_next = TAILQ_NEXT(unode, lru_link);
1553 
1554 				/* copy out node if there is space */
1555 				if (bytes_avail >= sizeof(struct nfs_user_stat_user_rec)) {
1556 					/* prepare a user stat rec for copying out */
1557 					ustat_rec.uid = unode->uid;
1558 					memset(&ustat_rec.sock, 0, sizeof(ustat_rec.sock));
1559 					bcopy(&unode->sock, &ustat_rec.sock, unode->sock.ss_len);
1560 					ustat_rec.ops = unode->ops;
1561 					ustat_rec.bytes_read = unode->bytes_read;
1562 					ustat_rec.bytes_written = unode->bytes_written;
1563 					ustat_rec.tm_start = unode->tm_start;
1564 					ustat_rec.tm_last = unode->tm_last;
1565 
1566 					error = copyout(&ustat_rec, oldp + pos, sizeof(struct nfs_user_stat_user_rec));
1567 
1568 					if (error) {
1569 						/* punt */
1570 						lck_mtx_unlock(&ulist->user_mutex);
1571 						goto ustat_done;
1572 					}
1573 
1574 					pos += sizeof(struct nfs_user_stat_user_rec);
1575 					bytes_avail -= sizeof(struct nfs_user_stat_user_rec);
1576 					recs_copied++;
1577 				} else {
1578 					/* Caller's buffer is exhausted */
1579 					bytes_avail = 0;
1580 				}
1581 				bytes_total += sizeof(struct nfs_user_stat_user_rec);
1582 			}
1583 			/* can unlock this export's list now */
1584 			lck_mtx_unlock(&ulist->user_mutex);
1585 		}
1586 	}
1587 
1588 ustat_done:
1589 	/* unlock the export table */
1590 	lck_rw_done(&nfsrv_export_rwlock);
1591 
1592 ustat_skip:
1593 	/* indicate number of actual records copied */
1594 	ustat_desc.rec_count = recs_copied;
1595 
1596 	if (!error) {
1597 		/* check if there was enough room for the buffer descriptor */
1598 		if (oldlen >= sizeof(struct nfs_user_stat_desc)) {
1599 			error = copyout(&ustat_desc, oldp, sizeof(struct nfs_user_stat_desc));
1600 		} else {
1601 			error = ENOMEM;
1602 		}
1603 
1604 		/* always indicate required buffer size */
1605 		if (!error && newlenp && newlen >= sizeof(bytes_total)) {
1606 			error = copyout(&bytes_total, newlenp, sizeof(bytes_total));
1607 		}
1608 	}
1609 	return error;
1610 }
1611 
1612 int
nfssvc_usercount(proc_t p,user_addr_t argp)1613 nfssvc_usercount(proc_t p, user_addr_t argp)
1614 {
1615 	int error;
1616 	user_addr_t oldp, newlenp;
1617 	user_size_t oldlen, newlen;
1618 	struct user_iovec iov[2];
1619 	size_t stat_size = sizeof(nfsrv_user_stat_node_count);
1620 
1621 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1622 	if (error) {
1623 		return error;
1624 	}
1625 
1626 	oldp = iov[0].iov_base;
1627 	oldlen = iov[0].iov_len;
1628 	newlenp = iov[1].iov_base;
1629 	newlen = iov[1].iov_len;
1630 
1631 	if (!oldp) {
1632 		if (newlenp && newlen >= sizeof(stat_size)) {
1633 			error = copyout(&stat_size, newlenp, sizeof(stat_size));
1634 		}
1635 		return error;
1636 	}
1637 
1638 	if (oldlen < stat_size) {
1639 		if (newlenp && newlen >= sizeof(stat_size)) {
1640 			(void)copyout(&stat_size, newlenp, sizeof(stat_size));
1641 		}
1642 		return ENOMEM;
1643 	}
1644 
1645 	if (nfsrv_is_initialized()) {
1646 		/* reclaim old expired user nodes */
1647 		nfsrv_active_user_list_reclaim();
1648 	}
1649 
1650 	error = copyout(&nfsrv_user_stat_node_count, oldp, sizeof(nfsrv_user_stat_node_count));
1651 
1652 	return error;
1653 }
1654 
1655 int
nfssvc_zerostats(void)1656 nfssvc_zerostats(void)
1657 {
1658 	bzero(&nfsrvstats, sizeof nfsrvstats);
1659 	return 0;
1660 }
1661 
1662 int
nfssvc_srvstats(proc_t p,user_addr_t argp)1663 nfssvc_srvstats(proc_t p, user_addr_t argp)
1664 {
1665 	int error;
1666 	user_addr_t oldp, newlenp;
1667 	user_size_t oldlen, newlen;
1668 	struct user_iovec iov[2];
1669 	size_t stat_size = sizeof(nfsrvstats);
1670 
1671 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1672 	if (error) {
1673 		return error;
1674 	}
1675 
1676 	oldp = iov[0].iov_base;
1677 	oldlen = iov[0].iov_len;
1678 	newlenp = iov[1].iov_base;
1679 	newlen = iov[1].iov_len;
1680 
1681 	if (!oldp) {
1682 		if (newlenp && newlen >= sizeof(stat_size)) {
1683 			error = copyout(&stat_size, newlenp, sizeof(stat_size));
1684 		}
1685 		return error;
1686 	}
1687 
1688 	if (oldlen < stat_size) {
1689 		if (newlenp && newlen >= sizeof(stat_size)) {
1690 			(void)copyout(&stat_size, newlenp, sizeof(stat_size));
1691 		}
1692 		return ENOMEM;
1693 	}
1694 
1695 	error = copyout(&nfsrvstats, oldp, stat_size);
1696 	if (error) {
1697 		return error;
1698 	}
1699 
1700 	return 0;
1701 }
1702 
1703 /*
1704  * Shut down a socket associated with an nfsrv_sock structure.
1705  * Should be called with the send lock set, if required.
1706  * The trick here is to increment the sref at the start, so that the nfsds
1707  * will stop using it and clear ns_flag at the end so that it will not be
1708  * reassigned during cleanup.
1709  */
1710 void
nfsrv_zapsock(struct nfsrv_sock * slp)1711 nfsrv_zapsock(struct nfsrv_sock *slp)
1712 {
1713 	socket_t so;
1714 
1715 	if ((slp->ns_flag & SLP_VALID) == 0) {
1716 		return;
1717 	}
1718 	slp->ns_flag &= ~SLP_ALLFLAGS;
1719 
1720 	so = slp->ns_so;
1721 	if (so == NULL) {
1722 		return;
1723 	}
1724 
1725 	sock_setupcall(so, NULL, NULL);
1726 	sock_shutdown(so, SHUT_RDWR);
1727 
1728 	/*
1729 	 * Remove from the up-call queue
1730 	 */
1731 	nfsrv_uc_dequeue(slp);
1732 }
1733 
1734 /*
1735  * cleanup and release a server socket structure.
1736  */
1737 void
nfsrv_slpfree(struct nfsrv_sock * slp)1738 nfsrv_slpfree(struct nfsrv_sock *slp)
1739 {
1740 	struct nfsrv_descript *nwp, *nnwp;
1741 
1742 	if (slp->ns_so) {
1743 		sock_release(slp->ns_so);
1744 		slp->ns_so = NULL;
1745 	}
1746 	if (slp->ns_recslen) {
1747 		OSAddAtomic(-slp->ns_recslen, &nfsrv_unprocessed_rpc_current);
1748 	}
1749 	if (slp->ns_nam) {
1750 		mbuf_free(slp->ns_nam);
1751 	}
1752 	if (slp->ns_raw) {
1753 		mbuf_freem(slp->ns_raw);
1754 	}
1755 	if (slp->ns_rec) {
1756 		mbuf_freem(slp->ns_rec);
1757 	}
1758 	if (slp->ns_frag) {
1759 		mbuf_freem(slp->ns_frag);
1760 	}
1761 	slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
1762 	slp->ns_reccnt = 0;
1763 
1764 	for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
1765 		nnwp = nwp->nd_tq.le_next;
1766 		LIST_REMOVE(nwp, nd_tq);
1767 		nfsm_chain_cleanup(&nwp->nd_nmreq);
1768 		if (nwp->nd_mrep) {
1769 			mbuf_freem(nwp->nd_mrep);
1770 		}
1771 		if (nwp->nd_nam2) {
1772 			mbuf_freem(nwp->nd_nam2);
1773 		}
1774 		if (IS_VALID_CRED(nwp->nd_cr)) {
1775 			kauth_cred_unref(&nwp->nd_cr);
1776 		}
1777 		if (nwp->nd_gss_context) {
1778 			nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
1779 		}
1780 		NFS_ZFREE(nfsrv_descript_zone, nwp);
1781 	}
1782 	LIST_INIT(&slp->ns_tq);
1783 
1784 	lck_rw_destroy(&slp->ns_rwlock, &nfsrv_slp_rwlock_group);
1785 	lck_mtx_destroy(&slp->ns_wgmutex, &nfsrv_slp_mutex_group);
1786 	kfree_type(struct nfsrv_sock, slp);
1787 }
1788 
1789 /*
1790  * Derefence a server socket structure. If it has no more references and
1791  * is no longer valid, you can throw it away.
1792  */
1793 static void
nfsrv_slpderef_locked(struct nfsrv_sock * slp)1794 nfsrv_slpderef_locked(struct nfsrv_sock *slp)
1795 {
1796 	lck_rw_lock_exclusive(&slp->ns_rwlock);
1797 	slp->ns_sref--;
1798 
1799 	if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
1800 		if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
1801 			/* remove socket from queue since there's no work */
1802 			if (slp->ns_flag & SLP_WAITQ) {
1803 				TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1804 			} else {
1805 				TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1806 			}
1807 			slp->ns_flag &= ~SLP_QUEUED;
1808 		}
1809 		lck_rw_done(&slp->ns_rwlock);
1810 		return;
1811 	}
1812 
1813 	/* This socket is no longer valid, so we'll get rid of it */
1814 
1815 	if (slp->ns_flag & SLP_QUEUED) {
1816 		if (slp->ns_flag & SLP_WAITQ) {
1817 			TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1818 		} else {
1819 			TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1820 		}
1821 		slp->ns_flag &= ~SLP_QUEUED;
1822 	}
1823 	lck_rw_done(&slp->ns_rwlock);
1824 
1825 	TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1826 	if (slp->ns_sotype == SOCK_STREAM) {
1827 		nfsrv_sock_tcp_cnt--;
1828 	}
1829 
1830 	/* now remove from the write gather socket list */
1831 	if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1832 		TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1833 		slp->ns_wgq.tqe_next = SLPNOLIST;
1834 	}
1835 	nfsrv_slpfree(slp);
1836 }
1837 
1838 void
nfsrv_slpderef(struct nfsrv_sock * slp)1839 nfsrv_slpderef(struct nfsrv_sock *slp)
1840 {
1841 	lck_mtx_lock(&nfsd_mutex);
1842 	nfsrv_slpderef_locked(slp);
1843 	lck_mtx_unlock(&nfsd_mutex);
1844 }
1845 
1846 /*
1847  * Check periodically for idle sockest if needed and
1848  * zap them.
1849  */
1850 void
nfsrv_idlesock_timer(__unused void * param0,__unused void * param1)1851 nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
1852 {
1853 	struct nfsrv_sock *slp, *tslp;
1854 	struct timeval now;
1855 	time_t time_to_wait = nfsrv_sock_idle_timeout;
1856 
1857 	microuptime(&now);
1858 	lck_mtx_lock(&nfsd_mutex);
1859 
1860 	/* Turn off the timer if we're suppose to and get out */
1861 	if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) {
1862 		nfsrv_sock_idle_timeout = 0;
1863 	}
1864 	if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
1865 		nfsrv_idlesock_timer_on = 0;
1866 		lck_mtx_unlock(&nfsd_mutex);
1867 		return;
1868 	}
1869 
1870 	TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
1871 		lck_rw_lock_exclusive(&slp->ns_rwlock);
1872 		/* Skip udp and referenced sockets */
1873 		if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
1874 			lck_rw_done(&slp->ns_rwlock);
1875 			continue;
1876 		}
1877 		/*
1878 		 * If this is the first non-referenced socket that hasn't idle out,
1879 		 * use its time stamp to calculate the earlist time in the future
1880 		 * to start the next invocation of the timer. Since the nfsrv_socklist
1881 		 * is sorted oldest access to newest. Once we find the first one,
1882 		 * we're done and break out of the loop.
1883 		 */
1884 		if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) ||
1885 		    nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
1886 			time_to_wait -= now.tv_sec - slp->ns_timestamp;
1887 			if (time_to_wait < 1) {
1888 				time_to_wait = 1;
1889 			}
1890 			lck_rw_done(&slp->ns_rwlock);
1891 			break;
1892 		}
1893 		/*
1894 		 * Bump the ref count. nfsrv_slpderef below will destroy
1895 		 * the socket, since nfsrv_zapsock has closed it.
1896 		 */
1897 		slp->ns_sref++;
1898 		nfsrv_zapsock(slp);
1899 		lck_rw_done(&slp->ns_rwlock);
1900 		nfsrv_slpderef_locked(slp);
1901 	}
1902 
1903 	/* Start ourself back up */
1904 	nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1905 	/* Remember when the next timer will fire for nfssvc_addsock. */
1906 	nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
1907 	lck_mtx_unlock(&nfsd_mutex);
1908 }
1909 
1910 /*
1911  * Clean up the data structures for the server.
1912  */
1913 void
nfsrv_cleanup(void)1914 nfsrv_cleanup(void)
1915 {
1916 	struct nfsrv_sock *slp, *nslp;
1917 	struct timeval now;
1918 #if CONFIG_FSE
1919 	struct nfsrv_fmod *fp, *nfp;
1920 	int i;
1921 #endif
1922 
1923 	microuptime(&now);
1924 	for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
1925 		nslp = TAILQ_NEXT(slp, ns_chain);
1926 		lck_rw_lock_exclusive(&slp->ns_rwlock);
1927 		slp->ns_sref++;
1928 		if (slp->ns_flag & SLP_VALID) {
1929 			nfsrv_zapsock(slp);
1930 		}
1931 		lck_rw_done(&slp->ns_rwlock);
1932 		nfsrv_slpderef_locked(slp);
1933 	}
1934 #
1935 #if CONFIG_FSE
1936 	/*
1937 	 * Flush pending file write fsevents
1938 	 */
1939 	lck_mtx_lock(&nfsrv_fmod_mutex);
1940 	for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1941 		for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
1942 			/*
1943 			 * Fire off the content modified fsevent for each
1944 			 * entry, remove it from the list, and free it.
1945 			 */
1946 			if (nfsrv_fsevents_enabled) {
1947 				fp->fm_context.vc_thread = current_thread();
1948 				add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
1949 				    FSE_ARG_VNODE, fp->fm_vp,
1950 				    FSE_ARG_DONE);
1951 			}
1952 			vnode_put(fp->fm_vp);
1953 			kauth_cred_unref(&fp->fm_context.vc_ucred);
1954 			nfp = LIST_NEXT(fp, fm_link);
1955 			LIST_REMOVE(fp, fm_link);
1956 			kfree_type(struct nfsrv_fmod, fp);
1957 		}
1958 	}
1959 	nfsrv_fmod_pending = 0;
1960 	lck_mtx_unlock(&nfsrv_fmod_mutex);
1961 #endif
1962 
1963 	nfsrv_uc_cleanup();     /* Stop nfs socket up-call threads */
1964 
1965 	nfs_gss_svc_cleanup();  /* Remove any RPCSEC_GSS contexts */
1966 
1967 	nfsrv_cleancache();     /* And clear out server cache */
1968 
1969 	nfsrv_udpsock = NULL;
1970 	nfsrv_udp6sock = NULL;
1971 }
1972 
1973 #endif /* CONFIG_NFS_SERVER */
1974