xref: /xnu-12377.1.9/bsd/nfs/nfs_syscalls.c (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1989, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * Rick Macklem at The University of Guelph.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
65  * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66  */
67 
68 #include <nfs/nfs_conf.h>
69 
70 /*
71  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
72  * support for mandatory and extensible security protections.  This notice
73  * is included in support of clause 2.2 (b) of the Apple Public License,
74  * Version 2.0.
75  */
76 
77 #include <sys/file_internal.h>
78 #include <sys/param.h>
79 #include <sys/mbuf.h>
80 #include <sys/vnode_internal.h>
81 #include <sys/uio_internal.h>
82 #include <sys/sysctl.h>
83 #include <sys/socketvar.h>
84 #include <sys/sysproto.h>
85 #include <sys/fsevents.h>
86 #include <kern/task.h>
87 
88 #include <security/audit/audit.h>
89 
90 #include <netinet/in.h>
91 #include <netinet/tcp.h>
92 #include <nfs/xdr_subs.h>
93 #include <nfs/rpcv2.h>
94 #include <nfs/nfsproto.h>
95 #include <nfs/nfs.h>
96 #include <nfs/nfsm_subs.h>
97 #include <nfs/nfsrvcache.h>
98 #include <nfs/nfs_gss.h>
99 #if CONFIG_MACF
100 #include <security/mac_framework.h>
101 #endif
102 
103 #if CONFIG_NFS_SERVER
104 
105 extern const nfsrv_proc_t nfsrv_procs[NFS_NPROCS];
106 
107 extern int nfsrv_wg_delay;
108 extern int nfsrv_wg_delay_v3;
109 
110 static int nfsrv_require_resv_port = 0;
111 static time_t  nfsrv_idlesock_timer_on = 0;
112 static int nfsrv_sock_tcp_cnt = 0;
113 #define NFSD_MIN_IDLE_TIMEOUT 30
114 static int nfsrv_sock_idle_timeout = 3600; /* One hour */
115 
116 int     nfssvc_export(user_addr_t argp);
117 int     nfssvc_exportstats(proc_t p, user_addr_t argp);
118 int     nfssvc_userstats(proc_t p, user_addr_t argp);
119 int     nfssvc_usercount(proc_t p, user_addr_t argp);
120 int     nfssvc_zerostats(void);
121 int     nfssvc_srvstats(proc_t p, user_addr_t argp);
122 int     nfssvc_nfsd(void);
123 int     nfssvc_addsock(socket_t, mbuf_t);
124 void    nfsrv_zapsock(struct nfsrv_sock *);
125 void    nfsrv_slpderef(struct nfsrv_sock *);
126 void    nfsrv_slpfree(struct nfsrv_sock *);
127 
128 #endif /* CONFIG_NFS_SERVER */
129 
130 /*
131  * sysctl stuff
132  */
133 SYSCTL_DECL(_vfs_generic);
134 SYSCTL_EXTENSIBLE_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs hinge");
135 
136 #if CONFIG_NFS_SERVER
137 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs server hinge");
138 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
139 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
140 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
141 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
142 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
143 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
144 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
145 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
146 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
147 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_debug_ctl, 0, "");
148 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, unprocessed_rpc_current, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_unprocessed_rpc_current, 0, "");
149 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, unprocessed_rpc_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_unprocessed_rpc_max, 0, "");
150 #if CONFIG_FSE
151 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
152 #endif
153 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
154 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
155 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
156 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
157 #ifdef NFS_UC_Q_DEBUG
158 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
159 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
160 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
161 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
162 #endif
163 #endif /* CONFIG_NFS_SERVER */
164 
165 /* NFS hooks */
166 
167 /* NFS hooks variables */
168 struct nfs_hooks_in nfsh = {
169 	.f_vinvalbuf      = NULL,
170 	.f_buf_page_inval = NULL
171 };
172 
173 /* NFS hooks registration functions */
174 void
nfs_register_hooks(struct nfs_hooks_in * inh,struct nfs_hooks_out * outh)175 nfs_register_hooks(struct nfs_hooks_in *inh, struct nfs_hooks_out *outh)
176 {
177 	if (inh) {
178 		nfsh.f_vinvalbuf = inh->f_vinvalbuf;
179 		nfsh.f_buf_page_inval = inh->f_buf_page_inval;
180 	}
181 
182 	if (outh) {
183 		outh->f_get_bsdthreadtask_info = get_bsdthreadtask_info;
184 	}
185 }
186 
187 void
nfs_unregister_hooks(void)188 nfs_unregister_hooks(void)
189 {
190 	memset(&nfsh, 0, sizeof(nfsh));
191 }
192 
193 /* NFS hooks wrappers */
194 int
nfs_vinvalbuf(vnode_t vp,int flags,vfs_context_t ctx,int intrflg)195 nfs_vinvalbuf(vnode_t vp, int flags, vfs_context_t ctx, int intrflg)
196 {
197 	if (nfsh.f_vinvalbuf == NULL) {
198 		return 0;
199 	}
200 
201 	return nfsh.f_vinvalbuf(vp, flags, ctx, intrflg);
202 }
203 
204 int
nfs_buf_page_inval(vnode_t vp,off_t offset)205 nfs_buf_page_inval(vnode_t vp, off_t offset)
206 {
207 	if (nfsh.f_buf_page_inval == NULL) {
208 		return 0;
209 	}
210 
211 	return nfsh.f_buf_page_inval(vp, offset);
212 }
213 
214 #if !CONFIG_NFS_SERVER
215 #define __no_nfs_server_unused      __unused
216 #else
217 #define __no_nfs_server_unused      /* nothing */
218 #endif
219 
220 /*
221  * NFS server system calls
222  * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
223  */
224 
225 #if CONFIG_NFS_SERVER
226 static struct nfs_exportfs *
nfsrv_find_exportfs(const char * ptr)227 nfsrv_find_exportfs(const char *ptr)
228 {
229 	struct nfs_exportfs *nxfs;
230 
231 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
232 		if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) {
233 			break;
234 		}
235 	}
236 	if (nxfs && strncmp(nxfs->nxfs_path, ptr, strlen(nxfs->nxfs_path))) {
237 		nxfs = NULL;
238 	}
239 
240 	return nxfs;
241 }
242 
243 static char *
nfsrv_export_remainder(char * path,char * nxfs_path)244 nfsrv_export_remainder(char *path, char *nxfs_path)
245 {
246 	int error;
247 	vnode_t vp, rvp;
248 	struct nameidata nd;
249 	size_t pathbuflen = MAXPATHLEN;
250 	char real_mntonname[MAXPATHLEN];
251 
252 	if (!strncmp(path, nxfs_path, strlen(nxfs_path))) {
253 		return path + strlen(nxfs_path);
254 	}
255 
256 	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
257 	    UIO_SYSSPACE, CAST_USER_ADDR_T(nxfs_path), vfs_context_current());
258 	error = namei(&nd);
259 	if (error) {
260 		return NULL;
261 	}
262 
263 	nameidone(&nd);
264 	vp = nd.ni_vp;
265 
266 	error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
267 	vnode_put(vp);
268 	if (error) {
269 		return NULL;
270 	}
271 
272 	error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen, VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
273 	vnode_put(rvp);
274 
275 	if (error || strncmp(path, real_mntonname, strlen(real_mntonname))) {
276 		return NULL;
277 	}
278 
279 	return path + strlen(real_mntonname);
280 }
281 /*
282  * Get file handle system call
283  */
284 int
getfh(proc_t p __no_nfs_server_unused,struct getfh_args * uap __no_nfs_server_unused,__unused int * retval)285 getfh(
286 	proc_t p __no_nfs_server_unused,
287 	struct getfh_args *uap __no_nfs_server_unused,
288 	__unused int *retval)
289 {
290 	vnode_t vp;
291 	struct nfs_filehandle nfh;
292 	int error, fhlen = 0, fidlen;
293 	struct nameidata nd;
294 	char path[MAXPATHLEN], real_mntonname[MAXPATHLEN], *ptr;
295 	size_t pathlen;
296 	struct nfs_exportfs *nxfs;
297 	struct nfs_export *nx;
298 
299 	/*
300 	 * Must be super user
301 	 */
302 	error = proc_suser(p);
303 	if (error) {
304 		return error;
305 	}
306 
307 	error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
308 	if (!error) {
309 		error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
310 	}
311 	if (error) {
312 		return error;
313 	}
314 	/* limit fh size to length specified (or v3 size by default) */
315 	if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) {
316 		fhlen = NFSV3_MAX_FH_SIZE;
317 	}
318 	fidlen = fhlen - sizeof(struct nfs_exphandle);
319 
320 	if (!nfsrv_is_initialized()) {
321 		return EINVAL;
322 	}
323 
324 	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
325 	    UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
326 	error = namei(&nd);
327 	if (error) {
328 		return error;
329 	}
330 	nameidone(&nd);
331 
332 	vp = nd.ni_vp;
333 
334 	// find exportfs that matches f_mntonname
335 	lck_rw_lock_shared(&nfsrv_export_rwlock);
336 	ptr = vfs_statfs(vnode_mount(vp))->f_mntonname;
337 	if ((nxfs = nfsrv_find_exportfs(ptr)) == NULL) {
338 		/*
339 		 * The f_mntonname might be a firmlink path.  Resolve
340 		 * it into a physical path and try again.
341 		 */
342 		size_t pathbuflen = MAXPATHLEN;
343 		vnode_t rvp;
344 
345 		error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
346 		if (error) {
347 			goto out;
348 		}
349 		error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen,
350 		    VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
351 		vnode_put(rvp);
352 		if (error) {
353 			goto out;
354 		}
355 		ptr = real_mntonname;
356 		nxfs = nfsrv_find_exportfs(ptr);
357 	}
358 	if (nxfs == NULL) {
359 		error = EINVAL;
360 		goto out;
361 	}
362 	// find export that best matches remainder of path
363 	if ((ptr = nfsrv_export_remainder(path, nxfs->nxfs_path)) == NULL) {
364 		error = EINVAL;
365 		goto out;
366 	}
367 
368 	while (*ptr && (*ptr == '/')) {
369 		ptr++;
370 	}
371 	LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
372 		size_t len = strlen(nx->nx_path);
373 		if (len == 0) { // we've hit the export entry for the root directory
374 			break;
375 		}
376 		if (!strncmp(nx->nx_path, ptr, len)) {
377 			break;
378 		}
379 	}
380 	if (!nx) {
381 		error = EINVAL;
382 		goto out;
383 	}
384 
385 	bzero(&nfh, sizeof(nfh));
386 	nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
387 	nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
388 	nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
389 	nfh.nfh_xh.nxh_flags = 0;
390 	nfh.nfh_xh.nxh_reserved = 0;
391 	nfh.nfh_len = fidlen;
392 	error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
393 	if (nfh.nfh_len > (uint32_t)fidlen) {
394 		error = EOVERFLOW;
395 	}
396 	nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
397 	nfh.nfh_len += sizeof(nfh.nfh_xh);
398 	nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
399 
400 out:
401 	lck_rw_done(&nfsrv_export_rwlock);
402 	vnode_put(vp);
403 	if (error) {
404 		return error;
405 	}
406 	/*
407 	 * At first blush, this may appear to leak a kernel stack
408 	 * address, but the copyout() never reaches &nfh.nfh_fhp
409 	 * (sizeof(fhandle_t) < sizeof(nfh)).
410 	 */
411 	error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
412 	return error;
413 }
414 
415 extern const struct fileops vnops;
416 
417 /*
418  * syscall for the rpc.lockd to use to translate a NFS file handle into
419  * an open descriptor.
420  *
421  * warning: do not remove the suser() call or this becomes one giant
422  * security hole.
423  */
424 int
fhopen(proc_t p __no_nfs_server_unused,struct fhopen_args * uap __no_nfs_server_unused,int32_t * retval __no_nfs_server_unused)425 fhopen(proc_t p __no_nfs_server_unused,
426     struct fhopen_args *uap __no_nfs_server_unused,
427     int32_t *retval __no_nfs_server_unused)
428 {
429 	vnode_t vp;
430 	struct nfs_filehandle nfh;
431 	struct nfs_export *nx;
432 	struct nfs_export_options *nxo;
433 	struct flock lf;
434 	struct fileproc *fp, *nfp;
435 	int fmode, error, type;
436 	int indx;
437 	vfs_context_t ctx = vfs_context_current();
438 	kauth_action_t action;
439 
440 	/*
441 	 * Must be super user
442 	 */
443 	error = suser(vfs_context_ucred(ctx), 0);
444 	if (error) {
445 		return error;
446 	}
447 
448 	if (!nfsrv_is_initialized()) {
449 		return EINVAL;
450 	}
451 
452 	fmode = FFLAGS(uap->flags);
453 	/* why not allow a non-read/write open for our lockd? */
454 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) {
455 		return EINVAL;
456 	}
457 
458 	error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
459 	if (error) {
460 		return error;
461 	}
462 	if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
463 	    (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) {
464 		return EINVAL;
465 	}
466 	error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
467 	if (error) {
468 		return error;
469 	}
470 	nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
471 
472 	lck_rw_lock_shared(&nfsrv_export_rwlock);
473 	/* now give me my vnode, it gets returned to me with a reference */
474 	error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
475 	lck_rw_done(&nfsrv_export_rwlock);
476 	if (error) {
477 		if (error == NFSERR_TRYLATER) {
478 			error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
479 		}
480 		return error;
481 	}
482 
483 	/*
484 	 * From now on we have to make sure not
485 	 * to forget about the vnode.
486 	 * Any error that causes an abort must vnode_put(vp).
487 	 * Just set error = err and 'goto bad;'.
488 	 */
489 
490 	/*
491 	 * from vn_open
492 	 */
493 	if (vnode_vtype(vp) == VSOCK) {
494 		error = EOPNOTSUPP;
495 		goto bad;
496 	}
497 
498 	/* disallow write operations on directories */
499 	if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
500 		error = EISDIR;
501 		goto bad;
502 	}
503 
504 #if CONFIG_MACF
505 	if ((error = mac_vnode_check_open(ctx, vp, fmode))) {
506 		goto bad;
507 	}
508 #endif
509 
510 	/* compute action to be authorized */
511 	action = 0;
512 	if (fmode & FREAD) {
513 		action |= KAUTH_VNODE_READ_DATA;
514 	}
515 	if (fmode & (FWRITE | O_TRUNC)) {
516 		action |= KAUTH_VNODE_WRITE_DATA;
517 	}
518 	if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) {
519 		goto bad;
520 	}
521 
522 	if ((error = VNOP_OPEN(vp, fmode, ctx))) {
523 		goto bad;
524 	}
525 	if ((error = vnode_ref_ext(vp, fmode, 0))) {
526 		goto bad;
527 	}
528 
529 	/*
530 	 * end of vn_open code
531 	 */
532 
533 	// starting here... error paths should call vn_close/vnode_put
534 	if ((error = falloc(p, &nfp, &indx)) != 0) {
535 		vn_close(vp, fmode & FMASK, ctx);
536 		goto bad;
537 	}
538 	fp = nfp;
539 
540 	fp->fp_glob->fg_flag = fmode & FMASK;
541 	fp->fp_glob->fg_ops = &vnops;
542 	fp_set_data(fp, vp);
543 
544 	// XXX do we really need to support this with fhopen()?
545 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
546 		lf.l_whence = SEEK_SET;
547 		lf.l_start = 0;
548 		lf.l_len = 0;
549 		if (fmode & O_EXLOCK) {
550 			lf.l_type = F_WRLCK;
551 		} else {
552 			lf.l_type = F_RDLCK;
553 		}
554 		type = F_FLOCK;
555 		if ((fmode & FNONBLOCK) == 0) {
556 			type |= F_WAIT;
557 		}
558 		if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf, type, ctx, NULL))) {
559 			struct vfs_context context = *vfs_context_current();
560 			/* Modify local copy (to not damage thread copy) */
561 			context.vc_ucred = fp->fp_glob->fg_cred;
562 
563 			vn_close(vp, fp->fp_glob->fg_flag, &context);
564 			fp_free(p, indx, fp);
565 			goto bad;
566 		}
567 		fp->fp_glob->fg_flag |= FWASLOCKED;
568 	}
569 
570 	vnode_put(vp);
571 
572 	proc_fdlock(p);
573 	procfdtbl_releasefd(p, indx, NULL);
574 	fp_drop(p, indx, fp, 1);
575 	proc_fdunlock(p);
576 
577 	*retval = indx;
578 	return 0;
579 
580 bad:
581 	vnode_put(vp);
582 	return error;
583 }
584 
585 /*
586  * NFS server pseudo system call
587  */
588 int
nfssvc(proc_t p __no_nfs_server_unused,struct nfssvc_args * uap __no_nfs_server_unused,__unused int * retval)589 nfssvc(proc_t p __no_nfs_server_unused,
590     struct nfssvc_args *uap __no_nfs_server_unused,
591     __unused int *retval)
592 {
593 	mbuf_t nam;
594 	struct user_nfsd_args user_nfsdarg;
595 	socket_t so;
596 	int error;
597 
598 	AUDIT_ARG(cmd, uap->flag);
599 
600 	/*
601 	 * Must be super user for NFSSVC_NFSD and NFSSVC_ADDSOCK operations.
602 	 */
603 	if ((uap->flag & (NFSSVC_NFSD | NFSSVC_ADDSOCK)) && ((error = proc_suser(p)))) {
604 		return error;
605 	}
606 #if CONFIG_MACF
607 	error = mac_system_check_nfsd(kauth_cred_get());
608 	if (error) {
609 		return error;
610 	}
611 #endif
612 
613 	/* make sure NFS server data structures have been initialized */
614 	nfsrv_init();
615 
616 	if (uap->flag & NFSSVC_ADDSOCK) {
617 		if (IS_64BIT_PROCESS(p)) {
618 			error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
619 		} else {
620 			struct nfsd_args    tmp_args;
621 			error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
622 			if (error == 0) {
623 				user_nfsdarg.sock = tmp_args.sock;
624 				user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
625 				user_nfsdarg.namelen = tmp_args.namelen;
626 			}
627 		}
628 		if (error) {
629 			return error;
630 		}
631 		/* get the socket */
632 		error = file_socket(user_nfsdarg.sock, &so);
633 		if (error) {
634 			return error;
635 		}
636 		/* Get the client address for connected sockets. */
637 		if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
638 			nam = NULL;
639 		} else {
640 			error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
641 			if (error) {
642 				/* drop the iocount file_socket() grabbed on the file descriptor */
643 				file_drop(user_nfsdarg.sock);
644 				return error;
645 			}
646 		}
647 		/*
648 		 * nfssvc_addsock() will grab a retain count on the socket
649 		 * to keep the socket from being closed when nfsd closes its
650 		 * file descriptor for it.
651 		 */
652 		error = nfssvc_addsock(so, nam);
653 		/* drop the iocount file_socket() grabbed on the file descriptor */
654 		file_drop(user_nfsdarg.sock);
655 	} else if (uap->flag & NFSSVC_NFSD) {
656 		error = nfssvc_nfsd();
657 	} else if (uap->flag & NFSSVC_EXPORT) {
658 		error = nfssvc_export(uap->argp);
659 	} else if (uap->flag & NFSSVC_EXPORTSTATS) {
660 		error = nfssvc_exportstats(p, uap->argp);
661 	} else if (uap->flag & NFSSVC_USERSTATS) {
662 		error = nfssvc_userstats(p, uap->argp);
663 	} else if (uap->flag & NFSSVC_USERCOUNT) {
664 		error = nfssvc_usercount(p, uap->argp);
665 	} else if (uap->flag & NFSSVC_ZEROSTATS) {
666 		error = nfssvc_zerostats();
667 	} else if (uap->flag & NFSSVC_SRVSTATS) {
668 		error = nfssvc_srvstats(p, uap->argp);
669 	} else {
670 		error = EINVAL;
671 	}
672 	if (error == EINTR || error == ERESTART) {
673 		error = 0;
674 	}
675 	return error;
676 }
677 
678 /*
679  * Adds a socket to the list for servicing by nfsds.
680  */
681 int
nfssvc_addsock(socket_t so,mbuf_t mynam)682 nfssvc_addsock(socket_t so, mbuf_t mynam)
683 {
684 	struct nfsrv_sock *slp;
685 	int error = 0, sodomain, sotype, soprotocol, on = 1;
686 	int first;
687 	struct timeval timeo;
688 	uint64_t sobufsize;
689 
690 	/* make sure mbuf constants are set up */
691 	if (!nfs_mbuf_mhlen) {
692 		nfs_mbuf_init();
693 	}
694 
695 	sock_gettype(so, &sodomain, &sotype, &soprotocol);
696 
697 	/* There should be only one UDP socket for each of IPv4 and IPv6 */
698 	if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
699 		mbuf_freem(mynam);
700 		return EEXIST;
701 	}
702 	if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
703 		mbuf_freem(mynam);
704 		return EEXIST;
705 	}
706 
707 	/* Set protocol options and reserve some space (for UDP). */
708 	if (sotype == SOCK_STREAM) {
709 		error = nfsrv_check_exports_allow_address(mynam);
710 		if (error) {
711 			log(LOG_INFO, "nfsvc_addsock:: nfsrv_check_exports_allow_address(myname) returned %d\n", error);
712 			mbuf_freem(mynam);
713 			return error;
714 		}
715 		sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
716 	}
717 	if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) {
718 		sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
719 	}
720 
721 	/* Set socket buffer sizes for UDP/TCP */
722 	sobufsize = (sotype == SOCK_DGRAM) ? NFS_UDPSOCKBUF : NFSRV_TCPSOCKBUF;
723 	error = sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &sobufsize, sizeof(sobufsize));
724 	if (error) {
725 		log(LOG_INFO, "nfssvc_addsock: socket buffer setting SO_SNDBUF to %llu error(s) %d\n", sobufsize, error);
726 	}
727 
728 	error = sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &sobufsize, sizeof(sobufsize));
729 	if (error) {
730 		log(LOG_INFO, "nfssvc_addsock: socket buffer setting SO_RCVBUF to %llu error(s) %d\n", sobufsize, error);
731 	}
732 	sock_nointerrupt(so, 0);
733 
734 	/*
735 	 * Set socket send/receive timeouts.
736 	 * Receive timeout shouldn't matter, but setting the send timeout
737 	 * will make sure that an unresponsive client can't hang the server.
738 	 */
739 	timeo.tv_usec = 0;
740 	timeo.tv_sec = 1;
741 	error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
742 	if (error) {
743 		log(LOG_INFO, "nfssvc_addsock: socket timeout setting SO_RCVTIMEO error(s) %d\n", error);
744 	}
745 
746 	timeo.tv_sec = 30;
747 	error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
748 	if (error) {
749 		log(LOG_INFO, "nfssvc_addsock: socket timeout setting SO_SNDTIMEO error(s) %d\n", error);
750 	}
751 
752 	slp = kalloc_type(struct nfsrv_sock, Z_WAITOK | Z_ZERO | Z_NOFAIL);
753 	lck_rw_init(&slp->ns_rwlock, &nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
754 	lck_mtx_init(&slp->ns_wgmutex, &nfsrv_slp_mutex_group, LCK_ATTR_NULL);
755 
756 	lck_mtx_lock(&nfsd_mutex);
757 
758 	if (soprotocol == IPPROTO_UDP) {
759 		if (sodomain == AF_INET) {
760 			/* There should be only one UDP/IPv4 socket */
761 			if (nfsrv_udpsock) {
762 				lck_mtx_unlock(&nfsd_mutex);
763 				nfsrv_slpfree(slp);
764 				mbuf_freem(mynam);
765 				return EEXIST;
766 			}
767 			nfsrv_udpsock = slp;
768 		}
769 		if (sodomain == AF_INET6) {
770 			/* There should be only one UDP/IPv6 socket */
771 			if (nfsrv_udp6sock) {
772 				lck_mtx_unlock(&nfsd_mutex);
773 				nfsrv_slpfree(slp);
774 				mbuf_freem(mynam);
775 				return EEXIST;
776 			}
777 			nfsrv_udp6sock = slp;
778 		}
779 	}
780 
781 	/* add the socket to the list */
782 	first = TAILQ_EMPTY(&nfsrv_socklist);
783 	TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
784 	if (sotype == SOCK_STREAM) {
785 		nfsrv_sock_tcp_cnt++;
786 		if (nfsrv_sock_idle_timeout < 0) {
787 			nfsrv_sock_idle_timeout = 0;
788 		}
789 		if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) {
790 			nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
791 		}
792 		/*
793 		 * Possibly start or stop the idle timer. We only start the idle timer when
794 		 * we have more than 2 * nfsd_thread_max connections. If the idle timer is
795 		 * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
796 		 * the number of connections.
797 		 */
798 		if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
799 			if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
800 				if (nfsrv_idlesock_timer_on) {
801 					thread_call_cancel(nfsrv_idlesock_timer_call);
802 					nfsrv_idlesock_timer_on = 0;
803 				}
804 			} else {
805 				struct nfsrv_sock *old_slp;
806 				struct timeval now;
807 				microuptime(&now);
808 				time_t time_to_wait = nfsrv_sock_idle_timeout;
809 				/*
810 				 * Get the oldest tcp socket and calculate the
811 				 * earliest time for the next idle timer to fire
812 				 * based on the possibly updated nfsrv_sock_idle_timeout
813 				 */
814 				TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
815 					if (old_slp->ns_sotype == SOCK_STREAM) {
816 						time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
817 						if (time_to_wait < 1) {
818 							time_to_wait = 1;
819 						}
820 						break;
821 					}
822 				}
823 				/*
824 				 * If we have a timer scheduled, but if its going to fire too late,
825 				 * turn it off.
826 				 */
827 				if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
828 					thread_call_cancel(nfsrv_idlesock_timer_call);
829 					nfsrv_idlesock_timer_on = 0;
830 				}
831 				/* Schedule the idle thread if it isn't already */
832 				if (!nfsrv_idlesock_timer_on) {
833 					nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
834 					nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
835 				}
836 			}
837 		}
838 	}
839 
840 	sock_retain(so); /* grab a retain count on the socket */
841 	slp->ns_so = so;
842 	slp->ns_sotype = sotype;
843 	slp->ns_nam = mynam;
844 
845 	/* set up the socket up-call */
846 	nfsrv_uc_addsock(slp, first);
847 
848 	/* mark that the socket is not in the nfsrv_sockwg list */
849 	slp->ns_wgq.tqe_next = SLPNOLIST;
850 
851 	slp->ns_flag = SLP_VALID | SLP_NEEDQ;
852 
853 	nfsrv_wakenfsd(slp);
854 	lck_mtx_unlock(&nfsd_mutex);
855 
856 	return 0;
857 }
858 
859 /*
860  * nfssvc_nfsd()
861  *
862  * nfsd theory of operation:
863  *
864  * The first nfsd thread stays in user mode accepting new TCP connections
865  * which are then added via the "addsock" call.  The rest of the nfsd threads
866  * simply call into the kernel and remain there in a loop handling NFS
867  * requests until killed by a signal.
868  *
869  * There's a list of nfsd threads (nfsd_head).
870  * There's an nfsd queue that contains only those nfsds that are
871  *   waiting for work to do (nfsd_queue).
872  *
873  * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
874  *   managing the work on the sockets:
875  *   nfsrv_sockwait - sockets w/new data waiting to be worked on
876  *   nfsrv_sockwork - sockets being worked on which may have more work to do
877  *   nfsrv_sockwg -- sockets which have pending write gather data
878  * When a socket receives data, if it is not currently queued, it
879  *   will be placed at the end of the "wait" queue.
880  * Whenever a socket needs servicing we make sure it is queued and
881  *   wake up a waiting nfsd (if there is one).
882  *
883  * nfsds will service at most 8 requests from the same socket before
884  *   defecting to work on another socket.
885  * nfsds will defect immediately if there are any sockets in the "wait" queue
886  * nfsds looking for a socket to work on check the "wait" queue first and
887  *   then check the "work" queue.
888  * When an nfsd starts working on a socket, it removes it from the head of
889  *   the queue it's currently on and moves it to the end of the "work" queue.
890  * When nfsds are checking the queues for work, any sockets found not to
891  *   have any work are simply dropped from the queue.
892  *
893  */
894 int
nfssvc_nfsd(void)895 nfssvc_nfsd(void)
896 {
897 	mbuf_t m, mrep = NULL;
898 	struct nfsrv_sock *slp;
899 	struct nfsd *nfsd;
900 	struct nfsrv_descript *nd = NULL;
901 	int error = 0, cacherep, writes_todo;
902 	int siz, procrastinate, opcnt = 0;
903 	time_t cur_usec;
904 	struct timeval now;
905 	struct vfs_context context;
906 	struct timespec to;
907 
908 #ifndef nolint
909 	cacherep = RC_DOIT;
910 	writes_todo = 0;
911 #endif
912 
913 	nfsd = kalloc_type(struct nfsd, Z_WAITOK | Z_ZERO | Z_NOFAIL);
914 	lck_mtx_lock(&nfsd_mutex);
915 	if (nfsd_thread_count++ == 0) {
916 		nfsrv_initcache();              /* Init the server request cache */
917 	}
918 	TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
919 	lck_mtx_unlock(&nfsd_mutex);
920 
921 	context.vc_thread = current_thread();
922 
923 	/* Set time out so that nfsd threads can wake up a see if they are still needed. */
924 	to.tv_sec = 5;
925 	to.tv_nsec = 0;
926 
927 	/*
928 	 * Loop getting rpc requests until SIGKILL.
929 	 */
930 	for (;;) {
931 		if (nfsd_thread_max <= 0) {
932 			/* NFS server shutting down, get out ASAP */
933 			error = EINTR;
934 			slp = nfsd->nfsd_slp;
935 		} else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
936 			/* already have some work to do */
937 			error = 0;
938 			slp = nfsd->nfsd_slp;
939 		} else {
940 			/* need to find work to do */
941 			error = 0;
942 			lck_mtx_lock(&nfsd_mutex);
943 			while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
944 				if (nfsd_thread_count > nfsd_thread_max) {
945 					/*
946 					 * If we have no socket and there are more
947 					 * nfsd threads than configured, let's exit.
948 					 */
949 					error = 0;
950 					goto done;
951 				}
952 				nfsd->nfsd_flag |= NFSD_WAITING;
953 				TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
954 				error = msleep(nfsd, &nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
955 				if (error) {
956 					if (nfsd->nfsd_flag & NFSD_WAITING) {
957 						TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
958 						nfsd->nfsd_flag &= ~NFSD_WAITING;
959 					}
960 					if (error == EWOULDBLOCK) {
961 						continue;
962 					}
963 					goto done;
964 				}
965 			}
966 			slp = nfsd->nfsd_slp;
967 			if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
968 				/* look for a socket to work on in the wait queue */
969 				while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
970 					lck_rw_lock_exclusive(&slp->ns_rwlock);
971 					/* remove from the head of the queue */
972 					TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
973 					slp->ns_flag &= ~SLP_WAITQ;
974 					if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
975 						break;
976 					}
977 					/* nothing to do, so skip this socket */
978 					lck_rw_done(&slp->ns_rwlock);
979 				}
980 			}
981 			if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
982 				/* look for a socket to work on in the work queue */
983 				while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
984 					lck_rw_lock_exclusive(&slp->ns_rwlock);
985 					/* remove from the head of the queue */
986 					TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
987 					slp->ns_flag &= ~SLP_WORKQ;
988 					if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
989 						break;
990 					}
991 					/* nothing to do, so skip this socket */
992 					lck_rw_done(&slp->ns_rwlock);
993 				}
994 			}
995 			if (!nfsd->nfsd_slp && slp) {
996 				/* we found a socket to work on, grab a reference */
997 				slp->ns_sref++;
998 				microuptime(&now);
999 				slp->ns_timestamp = now.tv_sec;
1000 				/* We keep the socket list in least recently used order for reaping idle sockets */
1001 				TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1002 				TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
1003 				nfsd->nfsd_slp = slp;
1004 				opcnt = 0;
1005 				/* and put it at the back of the work queue */
1006 				TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
1007 				slp->ns_flag |= SLP_WORKQ;
1008 				lck_rw_done(&slp->ns_rwlock);
1009 			}
1010 			lck_mtx_unlock(&nfsd_mutex);
1011 			if (!slp) {
1012 				continue;
1013 			}
1014 			lck_rw_lock_exclusive(&slp->ns_rwlock);
1015 			if (slp->ns_flag & SLP_VALID) {
1016 				if ((slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)) == SLP_NEEDQ) {
1017 					slp->ns_flag &= ~SLP_NEEDQ;
1018 					nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
1019 				}
1020 				if (slp->ns_flag & SLP_DISCONN) {
1021 					nfsrv_zapsock(slp);
1022 				}
1023 				error = nfsrv_dorec(slp, nfsd, &nd);
1024 				if (error == EINVAL) {  // RPCSEC_GSS drop
1025 					if (slp->ns_sotype == SOCK_STREAM) {
1026 						nfsrv_zapsock(slp); // drop connection
1027 					}
1028 				}
1029 				writes_todo = 0;
1030 				if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
1031 					microuptime(&now);
1032 					cur_usec = (now.tv_sec * 1000000) + now.tv_usec;
1033 					if (slp->ns_wgtime <= cur_usec) {
1034 						error = 0;
1035 						cacherep = RC_DOIT;
1036 						writes_todo = 1;
1037 					}
1038 					slp->ns_flag &= ~SLP_DOWRITES;
1039 				}
1040 				nfsd->nfsd_flag |= NFSD_REQINPROG;
1041 			}
1042 			lck_rw_done(&slp->ns_rwlock);
1043 		}
1044 		if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
1045 			if (nd) {
1046 				nfsm_chain_cleanup(&nd->nd_nmreq);
1047 				if (nd->nd_nam2) {
1048 					mbuf_freem(nd->nd_nam2);
1049 				}
1050 				if (IS_VALID_CRED(nd->nd_cr)) {
1051 					kauth_cred_unref(&nd->nd_cr);
1052 				}
1053 				if (nd->nd_gss_context) {
1054 					nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1055 				}
1056 				NFS_ZFREE(nfsrv_descript_zone, nd);
1057 			}
1058 			nfsd->nfsd_slp = NULL;
1059 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1060 			if (slp) {
1061 				nfsrv_slpderef(slp);
1062 			}
1063 			if (nfsd_thread_max <= 0) {
1064 				break;
1065 			}
1066 			continue;
1067 		}
1068 		if (nd) {
1069 			microuptime(&nd->nd_starttime);
1070 			if (nd->nd_nam2) {
1071 				nd->nd_nam = nd->nd_nam2;
1072 			} else {
1073 				nd->nd_nam = slp->ns_nam;
1074 			}
1075 
1076 			cacherep = nfsrv_getcache(nd, slp, &mrep);
1077 
1078 			if (nfsrv_require_resv_port) {
1079 				/* Check if source port is a reserved port */
1080 				in_port_t port = 0;
1081 				struct sockaddr *saddr = mtod(nd->nd_nam, struct sockaddr*);
1082 
1083 				if (saddr->sa_family == AF_INET) {
1084 					port = ntohs((SIN(saddr))->sin_port);
1085 				} else if (saddr->sa_family == AF_INET6) {
1086 					port = ntohs((SIN6(saddr))->sin6_port);
1087 				}
1088 				if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
1089 					nd->nd_procnum = NFSPROC_NOOP;
1090 					nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1091 					cacherep = RC_DOIT;
1092 				}
1093 			}
1094 		}
1095 
1096 		/*
1097 		 * Loop to get all the write RPC replies that have been
1098 		 * gathered together.
1099 		 */
1100 		do {
1101 			switch (cacherep) {
1102 			case RC_DOIT:
1103 				if (nd && (nd->nd_vers == NFS_VER3)) {
1104 					procrastinate = nfsrv_wg_delay_v3;
1105 				} else {
1106 					procrastinate = nfsrv_wg_delay;
1107 				}
1108 				lck_rw_lock_shared(&nfsrv_export_rwlock);
1109 				context.vc_ucred = NULL;
1110 				if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) {
1111 					error = nfsrv_writegather(&nd, slp, &context, &mrep);
1112 				} else {
1113 					error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
1114 				}
1115 				lck_rw_done(&nfsrv_export_rwlock);
1116 				if (mrep == NULL) {
1117 					/*
1118 					 * If this is a stream socket and we are not going
1119 					 * to send a reply we better close the connection
1120 					 * so the client doesn't hang.
1121 					 */
1122 					if (error && slp->ns_sotype == SOCK_STREAM) {
1123 						lck_rw_lock_exclusive(&slp->ns_rwlock);
1124 						nfsrv_zapsock(slp);
1125 						lck_rw_done(&slp->ns_rwlock);
1126 						printf("NFS server: NULL reply from proc = %d error = %d\n",
1127 						    nd->nd_procnum, error);
1128 					}
1129 					break;
1130 				}
1131 				if (error) {
1132 					OSAddAtomic64(1, &nfsrvstats.srv_errs);
1133 					nfsrv_updatecache(nd, FALSE, mrep);
1134 					if (nd->nd_nam2) {
1135 						mbuf_freem(nd->nd_nam2);
1136 						nd->nd_nam2 = NULL;
1137 					}
1138 					break;
1139 				}
1140 				OSAddAtomic64(1, &nfsrvstats.srvrpccntv3[nd->nd_procnum]);
1141 				nfsrv_updatecache(nd, TRUE, mrep);
1142 				OS_FALLTHROUGH;
1143 
1144 			case RC_REPLY:
1145 				if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1146 					/*
1147 					 * Need to checksum or encrypt the reply
1148 					 */
1149 					error = nfs_gss_svc_protect_reply(nd, mrep);
1150 					if (error) {
1151 						mbuf_freem(mrep);
1152 						break;
1153 					}
1154 				}
1155 
1156 				/*
1157 				 * Get the total size of the reply
1158 				 */
1159 				m = mrep;
1160 				siz = 0;
1161 				while (m) {
1162 					siz += mbuf_len(m);
1163 					m = mbuf_next(m);
1164 				}
1165 				if (siz <= 0 || siz > NFS_MAXPACKET) {
1166 					printf("mbuf siz=%d\n", siz);
1167 					panic("Bad nfs svc reply");
1168 				}
1169 				m = mrep;
1170 				mbuf_pkthdr_setlen(m, siz);
1171 				error = mbuf_pkthdr_setrcvif(m, NULL);
1172 				if (error) {
1173 					panic("nfsd setrcvif failed: %d", error);
1174 				}
1175 				/*
1176 				 * For stream protocols, prepend a Sun RPC
1177 				 * Record Mark.
1178 				 */
1179 				if (slp->ns_sotype == SOCK_STREAM) {
1180 					error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1181 					if (!error) {
1182 						*mtod(m, u_int32_t *) = htonl(0x80000000 | siz);
1183 					}
1184 				}
1185 				if (!error) {
1186 					if (slp->ns_flag & SLP_VALID) {
1187 						error = nfsrv_send(slp, nd->nd_nam2, m);
1188 					} else {
1189 						error = EPIPE;
1190 						mbuf_freem(m);
1191 					}
1192 				} else {
1193 					mbuf_freem(m);
1194 				}
1195 				mrep = NULL;
1196 				if (nd->nd_nam2) {
1197 					mbuf_freem(nd->nd_nam2);
1198 					nd->nd_nam2 = NULL;
1199 				}
1200 				if (error == EPIPE) {
1201 					lck_rw_lock_exclusive(&slp->ns_rwlock);
1202 					nfsrv_zapsock(slp);
1203 					lck_rw_done(&slp->ns_rwlock);
1204 				}
1205 				if (error == EINTR || error == ERESTART) {
1206 					nfsm_chain_cleanup(&nd->nd_nmreq);
1207 					if (IS_VALID_CRED(nd->nd_cr)) {
1208 						kauth_cred_unref(&nd->nd_cr);
1209 					}
1210 					if (nd->nd_gss_context) {
1211 						nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1212 					}
1213 					NFS_ZFREE(nfsrv_descript_zone, nd);
1214 					nfsrv_slpderef(slp);
1215 					lck_mtx_lock(&nfsd_mutex);
1216 					goto done;
1217 				}
1218 				break;
1219 			case RC_DROPIT:
1220 				mbuf_freem(nd->nd_nam2);
1221 				nd->nd_nam2 = NULL;
1222 				break;
1223 			}
1224 			;
1225 			opcnt++;
1226 			if (nd) {
1227 				nfsm_chain_cleanup(&nd->nd_nmreq);
1228 				if (nd->nd_nam2) {
1229 					mbuf_freem(nd->nd_nam2);
1230 				}
1231 				if (IS_VALID_CRED(nd->nd_cr)) {
1232 					kauth_cred_unref(&nd->nd_cr);
1233 				}
1234 				if (nd->nd_gss_context) {
1235 					nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1236 				}
1237 				NFS_ZFREE(nfsrv_descript_zone, nd);
1238 			}
1239 
1240 			/*
1241 			 * Check to see if there are outstanding writes that
1242 			 * need to be serviced.
1243 			 */
1244 			writes_todo = 0;
1245 			if (slp->ns_wgtime) {
1246 				microuptime(&now);
1247 				cur_usec = (now.tv_sec * 1000000) + now.tv_usec;
1248 				if (slp->ns_wgtime <= cur_usec) {
1249 					cacherep = RC_DOIT;
1250 					writes_todo = 1;
1251 				}
1252 			}
1253 		} while (writes_todo);
1254 
1255 		nd = NULL;
1256 		if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1257 			lck_rw_lock_exclusive(&slp->ns_rwlock);
1258 			error = nfsrv_dorec(slp, nfsd, &nd);
1259 			if (error == EINVAL) {  // RPCSEC_GSS drop
1260 				if (slp->ns_sotype == SOCK_STREAM) {
1261 					nfsrv_zapsock(slp); // drop connection
1262 				}
1263 			}
1264 			lck_rw_done(&slp->ns_rwlock);
1265 		}
1266 		if (!nd) {
1267 			/* drop our reference on the socket */
1268 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1269 			nfsd->nfsd_slp = NULL;
1270 			nfsrv_slpderef(slp);
1271 		}
1272 	}
1273 	lck_mtx_lock(&nfsd_mutex);
1274 done:
1275 	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
1276 	kfree_type(struct nfsd, nfsd);
1277 	if (--nfsd_thread_count == 0) {
1278 		nfsrv_cleanup();
1279 	}
1280 	lck_mtx_unlock(&nfsd_mutex);
1281 	return error;
1282 }
1283 
1284 int
nfssvc_export(user_addr_t argp)1285 nfssvc_export(user_addr_t argp)
1286 {
1287 	int error = 0, is_64bit;
1288 	struct user_nfs_export_args unxa;
1289 	vfs_context_t ctx = vfs_context_current();
1290 
1291 	is_64bit = vfs_context_is64bit(ctx);
1292 
1293 	/* copy in pointers to path and export args */
1294 	if (is_64bit) {
1295 		error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1296 	} else {
1297 		struct nfs_export_args tnxa;
1298 		error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1299 		if (error == 0) {
1300 			/* munge into LP64 version of nfs_export_args structure */
1301 			unxa.nxa_fsid = tnxa.nxa_fsid;
1302 			unxa.nxa_expid = tnxa.nxa_expid;
1303 			unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1304 			unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1305 			unxa.nxa_flags = tnxa.nxa_flags;
1306 			unxa.nxa_netcount = tnxa.nxa_netcount;
1307 			unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1308 		}
1309 	}
1310 	if (error) {
1311 		return error;
1312 	}
1313 
1314 	error = nfsrv_export(&unxa, ctx);
1315 
1316 	return error;
1317 }
1318 
1319 int
nfssvc_exportstats(proc_t p,user_addr_t argp)1320 nfssvc_exportstats(proc_t p, user_addr_t argp)
1321 {
1322 	int error = 0;
1323 	uint pos;
1324 	struct nfs_exportfs *nxfs;
1325 	struct nfs_export *nx;
1326 	struct nfs_export_stat_desc stat_desc = {};
1327 	struct nfs_export_stat_rec statrec;
1328 	uint numExports, totlen, count;
1329 	size_t numRecs;
1330 	user_addr_t oldp, newlenp;
1331 	user_size_t oldlen, newlen;
1332 	struct user_iovec iov[2];
1333 
1334 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1335 	if (error) {
1336 		return error;
1337 	}
1338 
1339 	oldp = iov[0].iov_base;
1340 	oldlen = iov[0].iov_len;
1341 	newlenp = iov[1].iov_base;
1342 	newlen = iov[1].iov_len;
1343 
1344 	/* setup export stat descriptor */
1345 	stat_desc.rec_vers = NFS_EXPORT_STAT_REC_VERSION;
1346 
1347 	if (!nfsrv_is_initialized()) {
1348 		stat_desc.rec_count = 0;
1349 		if (oldp && (oldlen >= sizeof(struct nfs_export_stat_desc))) {
1350 			error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1351 		}
1352 		size_t stat_desc_size = sizeof(struct nfs_export_stat_desc);
1353 		if (!error && newlenp && newlen >= sizeof(stat_desc_size)) {
1354 			error = copyout(&stat_desc_size, newlenp, sizeof(stat_desc_size));
1355 		}
1356 		return error;
1357 	}
1358 
1359 	/* Count the number of exported directories */
1360 	lck_rw_lock_shared(&nfsrv_export_rwlock);
1361 	numExports = 0;
1362 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next)
1363 	LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next)
1364 	numExports += 1;
1365 
1366 	/* update stat descriptor's export record count */
1367 	stat_desc.rec_count = numExports;
1368 
1369 	/* calculate total size of required buffer */
1370 	totlen = sizeof(struct nfs_export_stat_desc) + (numExports * sizeof(struct nfs_export_stat_rec));
1371 
1372 	/* Check caller's buffer */
1373 	if (oldp == 0 || newlenp == 0) {
1374 		lck_rw_done(&nfsrv_export_rwlock);
1375 		/* indicate required buffer len */
1376 		if (newlenp && newlen >= sizeof(totlen)) {
1377 			error = copyout(&totlen, newlenp, sizeof(totlen));
1378 		}
1379 		return error;
1380 	}
1381 
1382 	/* We require the caller's buffer to be at least large enough to hold the descriptor */
1383 	if (oldlen < sizeof(struct nfs_export_stat_desc) || newlen < sizeof(totlen)) {
1384 		lck_rw_done(&nfsrv_export_rwlock);
1385 		/* indicate required buffer len */
1386 		if (newlenp && newlen >= sizeof(totlen)) {
1387 			(void)copyout(&totlen, newlenp, sizeof(totlen));
1388 		}
1389 		return ENOMEM;
1390 	}
1391 
1392 	/* indicate required buffer len */
1393 	error = copyout(&totlen, newlenp, sizeof(totlen));
1394 	if (error) {
1395 		lck_rw_done(&nfsrv_export_rwlock);
1396 		return error;
1397 	}
1398 
1399 	/* check if export table is empty */
1400 	if (!numExports) {
1401 		lck_rw_done(&nfsrv_export_rwlock);
1402 		error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1403 		return error;
1404 	}
1405 
1406 	/* calculate how many actual export stat records fit into caller's buffer */
1407 	numRecs = (totlen - sizeof(struct nfs_export_stat_desc)) / sizeof(struct nfs_export_stat_rec);
1408 
1409 	if (!numRecs) {
1410 		/* caller's buffer can only accomodate descriptor */
1411 		lck_rw_done(&nfsrv_export_rwlock);
1412 		stat_desc.rec_count = 0;
1413 		error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1414 		return error;
1415 	}
1416 
1417 	/* adjust to actual number of records to copyout to caller's buffer */
1418 	if (numRecs > numExports) {
1419 		numRecs = numExports;
1420 	}
1421 
1422 	/* set actual number of records we are returning */
1423 	stat_desc.rec_count = numRecs;
1424 
1425 	/* first copy out the stat descriptor */
1426 	pos = 0;
1427 	error = copyout(&stat_desc, oldp + pos, sizeof(struct nfs_export_stat_desc));
1428 	if (error) {
1429 		lck_rw_done(&nfsrv_export_rwlock);
1430 		return error;
1431 	}
1432 	pos += sizeof(struct nfs_export_stat_desc);
1433 
1434 	/* Loop through exported directories */
1435 	count = 0;
1436 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
1437 		LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
1438 			if (count >= numRecs) {
1439 				break;
1440 			}
1441 
1442 			/* build exported filesystem path */
1443 			memset(statrec.path, 0, sizeof(statrec.path));
1444 			snprintf(statrec.path, sizeof(statrec.path), "%s%s%s",
1445 			    nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
1446 			    nx->nx_path);
1447 
1448 			/* build the 64-bit export stat counters */
1449 			statrec.ops = ((uint64_t)nx->nx_stats.ops.hi << 32) |
1450 			    nx->nx_stats.ops.lo;
1451 			statrec.bytes_read = ((uint64_t)nx->nx_stats.bytes_read.hi << 32) |
1452 			    nx->nx_stats.bytes_read.lo;
1453 			statrec.bytes_written = ((uint64_t)nx->nx_stats.bytes_written.hi << 32) |
1454 			    nx->nx_stats.bytes_written.lo;
1455 			error = copyout(&statrec, oldp + pos, sizeof(statrec));
1456 			if (error) {
1457 				lck_rw_done(&nfsrv_export_rwlock);
1458 				return error;
1459 			}
1460 			/* advance buffer position */
1461 			pos += sizeof(statrec);
1462 		}
1463 	}
1464 	lck_rw_done(&nfsrv_export_rwlock);
1465 
1466 	return error;
1467 }
1468 
1469 int
nfssvc_userstats(proc_t p,user_addr_t argp)1470 nfssvc_userstats(proc_t p, user_addr_t argp)
1471 {
1472 	int error = 0;
1473 	struct nfs_exportfs *nxfs;
1474 	struct nfs_export *nx;
1475 	struct nfs_active_user_list *ulist;
1476 	struct nfs_user_stat_desc ustat_desc = {};
1477 	struct nfs_user_stat_node *unode, *unode_next;
1478 	struct nfs_user_stat_user_rec ustat_rec;
1479 	struct nfs_user_stat_path_rec upath_rec;
1480 	uint bytes_total, recs_copied, pos;
1481 	size_t bytes_avail;
1482 	user_addr_t oldp, newlenp;
1483 	user_size_t oldlen, newlen;
1484 	struct user_iovec iov[2];
1485 
1486 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1487 	if (error) {
1488 		return error;
1489 	}
1490 
1491 	oldp = iov[0].iov_base;
1492 	oldlen = iov[0].iov_len;
1493 	newlenp = iov[1].iov_base;
1494 	newlen = iov[1].iov_len;
1495 
1496 	/* init structures used for copying out of kernel */
1497 	ustat_desc.rec_vers = NFS_USER_STAT_REC_VERSION;
1498 	ustat_rec.rec_type = NFS_USER_STAT_USER_REC;
1499 	upath_rec.rec_type = NFS_USER_STAT_PATH_REC;
1500 
1501 	/* initialize counters */
1502 	bytes_total = sizeof(struct nfs_user_stat_desc);
1503 	bytes_avail  = oldlen;
1504 	recs_copied = 0;
1505 
1506 	if (!nfsrv_is_initialized()) { /* NFS server not initialized, so no stats */
1507 		goto ustat_skip;
1508 	}
1509 
1510 	/* reclaim old expired user nodes */
1511 	nfsrv_active_user_list_reclaim();
1512 
1513 	/* reserve space for the buffer descriptor */
1514 	if (bytes_avail >= sizeof(struct nfs_user_stat_desc)) {
1515 		bytes_avail -= sizeof(struct nfs_user_stat_desc);
1516 	} else {
1517 		bytes_avail = 0;
1518 	}
1519 
1520 	/* put buffer position past the buffer descriptor */
1521 	pos = sizeof(struct nfs_user_stat_desc);
1522 
1523 	/* Loop through exported directories */
1524 	lck_rw_lock_shared(&nfsrv_export_rwlock);
1525 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
1526 		LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
1527 			/* copy out path */
1528 			if (bytes_avail >= sizeof(struct nfs_user_stat_path_rec)) {
1529 				memset(upath_rec.path, 0, sizeof(upath_rec.path));
1530 				snprintf(upath_rec.path, sizeof(upath_rec.path), "%s%s%s",
1531 				    nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
1532 				    nx->nx_path);
1533 
1534 				error = copyout(&upath_rec, oldp + pos, sizeof(struct nfs_user_stat_path_rec));
1535 				if (error) {
1536 					/* punt */
1537 					goto ustat_done;
1538 				}
1539 
1540 				pos += sizeof(struct nfs_user_stat_path_rec);
1541 				bytes_avail -= sizeof(struct nfs_user_stat_path_rec);
1542 				recs_copied++;
1543 			} else {
1544 				/* Caller's buffer is exhausted */
1545 				bytes_avail = 0;
1546 			}
1547 
1548 			bytes_total += sizeof(struct nfs_user_stat_path_rec);
1549 
1550 			/* Scan through all user nodes of this export */
1551 			ulist = &nx->nx_user_list;
1552 			lck_mtx_lock(&ulist->user_mutex);
1553 			for (unode = TAILQ_FIRST(&ulist->user_lru); unode; unode = unode_next) {
1554 				unode_next = TAILQ_NEXT(unode, lru_link);
1555 
1556 				/* copy out node if there is space */
1557 				if (bytes_avail >= sizeof(struct nfs_user_stat_user_rec)) {
1558 					/* prepare a user stat rec for copying out */
1559 					ustat_rec.uid = unode->uid;
1560 					memset(&ustat_rec.sock, 0, sizeof(ustat_rec.sock));
1561 					bcopy(&unode->sock, &ustat_rec.sock, unode->sock.ss_len);
1562 					ustat_rec.ops = unode->ops;
1563 					ustat_rec.bytes_read = unode->bytes_read;
1564 					ustat_rec.bytes_written = unode->bytes_written;
1565 					ustat_rec.tm_start = unode->tm_start;
1566 					ustat_rec.tm_last = unode->tm_last;
1567 
1568 					error = copyout(&ustat_rec, oldp + pos, sizeof(struct nfs_user_stat_user_rec));
1569 
1570 					if (error) {
1571 						/* punt */
1572 						lck_mtx_unlock(&ulist->user_mutex);
1573 						goto ustat_done;
1574 					}
1575 
1576 					pos += sizeof(struct nfs_user_stat_user_rec);
1577 					bytes_avail -= sizeof(struct nfs_user_stat_user_rec);
1578 					recs_copied++;
1579 				} else {
1580 					/* Caller's buffer is exhausted */
1581 					bytes_avail = 0;
1582 				}
1583 				bytes_total += sizeof(struct nfs_user_stat_user_rec);
1584 			}
1585 			/* can unlock this export's list now */
1586 			lck_mtx_unlock(&ulist->user_mutex);
1587 		}
1588 	}
1589 
1590 ustat_done:
1591 	/* unlock the export table */
1592 	lck_rw_done(&nfsrv_export_rwlock);
1593 
1594 ustat_skip:
1595 	/* indicate number of actual records copied */
1596 	ustat_desc.rec_count = recs_copied;
1597 
1598 	if (!error) {
1599 		/* check if there was enough room for the buffer descriptor */
1600 		if (oldlen >= sizeof(struct nfs_user_stat_desc)) {
1601 			error = copyout(&ustat_desc, oldp, sizeof(struct nfs_user_stat_desc));
1602 		} else {
1603 			error = ENOMEM;
1604 		}
1605 
1606 		/* always indicate required buffer size */
1607 		if (!error && newlenp && newlen >= sizeof(bytes_total)) {
1608 			error = copyout(&bytes_total, newlenp, sizeof(bytes_total));
1609 		}
1610 	}
1611 	return error;
1612 }
1613 
1614 int
nfssvc_usercount(proc_t p,user_addr_t argp)1615 nfssvc_usercount(proc_t p, user_addr_t argp)
1616 {
1617 	int error;
1618 	user_addr_t oldp, newlenp;
1619 	user_size_t oldlen, newlen;
1620 	struct user_iovec iov[2];
1621 	size_t stat_size = sizeof(nfsrv_user_stat_node_count);
1622 
1623 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1624 	if (error) {
1625 		return error;
1626 	}
1627 
1628 	oldp = iov[0].iov_base;
1629 	oldlen = iov[0].iov_len;
1630 	newlenp = iov[1].iov_base;
1631 	newlen = iov[1].iov_len;
1632 
1633 	if (!oldp) {
1634 		if (newlenp && newlen >= sizeof(stat_size)) {
1635 			error = copyout(&stat_size, newlenp, sizeof(stat_size));
1636 		}
1637 		return error;
1638 	}
1639 
1640 	if (oldlen < stat_size) {
1641 		if (newlenp && newlen >= sizeof(stat_size)) {
1642 			(void)copyout(&stat_size, newlenp, sizeof(stat_size));
1643 		}
1644 		return ENOMEM;
1645 	}
1646 
1647 	if (nfsrv_is_initialized()) {
1648 		/* reclaim old expired user nodes */
1649 		nfsrv_active_user_list_reclaim();
1650 	}
1651 
1652 	error = copyout(&nfsrv_user_stat_node_count, oldp, sizeof(nfsrv_user_stat_node_count));
1653 
1654 	return error;
1655 }
1656 
1657 int
nfssvc_zerostats(void)1658 nfssvc_zerostats(void)
1659 {
1660 	bzero(&nfsrvstats, sizeof nfsrvstats);
1661 	return 0;
1662 }
1663 
1664 int
nfssvc_srvstats(proc_t p,user_addr_t argp)1665 nfssvc_srvstats(proc_t p, user_addr_t argp)
1666 {
1667 	int error;
1668 	user_addr_t oldp, newlenp;
1669 	user_size_t oldlen, newlen;
1670 	struct user_iovec iov[2];
1671 	size_t stat_size = sizeof(nfsrvstats);
1672 
1673 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1674 	if (error) {
1675 		return error;
1676 	}
1677 
1678 	oldp = iov[0].iov_base;
1679 	oldlen = iov[0].iov_len;
1680 	newlenp = iov[1].iov_base;
1681 	newlen = iov[1].iov_len;
1682 
1683 	if (!oldp) {
1684 		if (newlenp && newlen >= sizeof(stat_size)) {
1685 			error = copyout(&stat_size, newlenp, sizeof(stat_size));
1686 		}
1687 		return error;
1688 	}
1689 
1690 	if (oldlen < stat_size) {
1691 		if (newlenp && newlen >= sizeof(stat_size)) {
1692 			(void)copyout(&stat_size, newlenp, sizeof(stat_size));
1693 		}
1694 		return ENOMEM;
1695 	}
1696 
1697 	error = copyout(&nfsrvstats, oldp, stat_size);
1698 	if (error) {
1699 		return error;
1700 	}
1701 
1702 	return 0;
1703 }
1704 
1705 /*
1706  * Shut down a socket associated with an nfsrv_sock structure.
1707  * Should be called with the send lock set, if required.
1708  * The trick here is to increment the sref at the start, so that the nfsds
1709  * will stop using it and clear ns_flag at the end so that it will not be
1710  * reassigned during cleanup.
1711  */
1712 void
nfsrv_zapsock(struct nfsrv_sock * slp)1713 nfsrv_zapsock(struct nfsrv_sock *slp)
1714 {
1715 	socket_t so;
1716 
1717 	if ((slp->ns_flag & SLP_VALID) == 0) {
1718 		return;
1719 	}
1720 	slp->ns_flag &= ~SLP_ALLFLAGS;
1721 
1722 	so = slp->ns_so;
1723 	if (so == NULL) {
1724 		return;
1725 	}
1726 
1727 	sock_setupcall(so, NULL, NULL);
1728 	sock_shutdown(so, SHUT_RDWR);
1729 
1730 	/*
1731 	 * Remove from the up-call queue
1732 	 */
1733 	nfsrv_uc_dequeue(slp);
1734 }
1735 
1736 /*
1737  * cleanup and release a server socket structure.
1738  */
1739 void
nfsrv_slpfree(struct nfsrv_sock * slp)1740 nfsrv_slpfree(struct nfsrv_sock *slp)
1741 {
1742 	struct nfsrv_descript *nwp, *nnwp;
1743 
1744 	if (slp->ns_so) {
1745 		sock_release(slp->ns_so);
1746 		slp->ns_so = NULL;
1747 	}
1748 	if (slp->ns_recslen) {
1749 		OSAddAtomic(-slp->ns_recslen, &nfsrv_unprocessed_rpc_current);
1750 	}
1751 	if (slp->ns_nam) {
1752 		mbuf_free(slp->ns_nam);
1753 	}
1754 	if (slp->ns_raw) {
1755 		mbuf_freem(slp->ns_raw);
1756 	}
1757 	if (slp->ns_rec) {
1758 		mbuf_freem(slp->ns_rec);
1759 	}
1760 	if (slp->ns_frag) {
1761 		mbuf_freem(slp->ns_frag);
1762 	}
1763 	slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
1764 	slp->ns_reccnt = 0;
1765 
1766 	for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
1767 		nnwp = nwp->nd_tq.le_next;
1768 		LIST_REMOVE(nwp, nd_tq);
1769 		nfsm_chain_cleanup(&nwp->nd_nmreq);
1770 		if (nwp->nd_mrep) {
1771 			mbuf_freem(nwp->nd_mrep);
1772 		}
1773 		if (nwp->nd_nam2) {
1774 			mbuf_freem(nwp->nd_nam2);
1775 		}
1776 		if (IS_VALID_CRED(nwp->nd_cr)) {
1777 			kauth_cred_unref(&nwp->nd_cr);
1778 		}
1779 		if (nwp->nd_gss_context) {
1780 			nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
1781 		}
1782 		NFS_ZFREE(nfsrv_descript_zone, nwp);
1783 	}
1784 	LIST_INIT(&slp->ns_tq);
1785 
1786 	lck_rw_destroy(&slp->ns_rwlock, &nfsrv_slp_rwlock_group);
1787 	lck_mtx_destroy(&slp->ns_wgmutex, &nfsrv_slp_mutex_group);
1788 	kfree_type(struct nfsrv_sock, slp);
1789 }
1790 
1791 /*
1792  * Derefence a server socket structure. If it has no more references and
1793  * is no longer valid, you can throw it away.
1794  */
1795 static void
nfsrv_slpderef_locked(struct nfsrv_sock * slp)1796 nfsrv_slpderef_locked(struct nfsrv_sock *slp)
1797 {
1798 	lck_rw_lock_exclusive(&slp->ns_rwlock);
1799 	slp->ns_sref--;
1800 
1801 	if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
1802 		if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
1803 			/* remove socket from queue since there's no work */
1804 			if (slp->ns_flag & SLP_WAITQ) {
1805 				TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1806 			} else {
1807 				TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1808 			}
1809 			slp->ns_flag &= ~SLP_QUEUED;
1810 		}
1811 		lck_rw_done(&slp->ns_rwlock);
1812 		return;
1813 	}
1814 
1815 	/* This socket is no longer valid, so we'll get rid of it */
1816 
1817 	if (slp->ns_flag & SLP_QUEUED) {
1818 		if (slp->ns_flag & SLP_WAITQ) {
1819 			TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1820 		} else {
1821 			TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1822 		}
1823 		slp->ns_flag &= ~SLP_QUEUED;
1824 	}
1825 	lck_rw_done(&slp->ns_rwlock);
1826 
1827 	TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1828 	if (slp->ns_sotype == SOCK_STREAM) {
1829 		nfsrv_sock_tcp_cnt--;
1830 	}
1831 
1832 	/* now remove from the write gather socket list */
1833 	if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1834 		TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1835 		slp->ns_wgq.tqe_next = SLPNOLIST;
1836 	}
1837 	nfsrv_slpfree(slp);
1838 }
1839 
1840 void
nfsrv_slpderef(struct nfsrv_sock * slp)1841 nfsrv_slpderef(struct nfsrv_sock *slp)
1842 {
1843 	lck_mtx_lock(&nfsd_mutex);
1844 	nfsrv_slpderef_locked(slp);
1845 	lck_mtx_unlock(&nfsd_mutex);
1846 }
1847 
1848 /*
1849  * Check periodically for idle sockest if needed and
1850  * zap them.
1851  */
1852 void
nfsrv_idlesock_timer(__unused void * param0,__unused void * param1)1853 nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
1854 {
1855 	struct nfsrv_sock *slp, *tslp;
1856 	struct timeval now;
1857 	time_t time_to_wait = nfsrv_sock_idle_timeout;
1858 
1859 	microuptime(&now);
1860 	lck_mtx_lock(&nfsd_mutex);
1861 
1862 	/* Turn off the timer if we're suppose to and get out */
1863 	if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) {
1864 		nfsrv_sock_idle_timeout = 0;
1865 	}
1866 	if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
1867 		nfsrv_idlesock_timer_on = 0;
1868 		lck_mtx_unlock(&nfsd_mutex);
1869 		return;
1870 	}
1871 
1872 	TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
1873 		lck_rw_lock_exclusive(&slp->ns_rwlock);
1874 		/* Skip udp and referenced sockets */
1875 		if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
1876 			lck_rw_done(&slp->ns_rwlock);
1877 			continue;
1878 		}
1879 		/*
1880 		 * If this is the first non-referenced socket that hasn't idle out,
1881 		 * use its time stamp to calculate the earlist time in the future
1882 		 * to start the next invocation of the timer. Since the nfsrv_socklist
1883 		 * is sorted oldest access to newest. Once we find the first one,
1884 		 * we're done and break out of the loop.
1885 		 */
1886 		if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) ||
1887 		    nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
1888 			time_to_wait -= now.tv_sec - slp->ns_timestamp;
1889 			if (time_to_wait < 1) {
1890 				time_to_wait = 1;
1891 			}
1892 			lck_rw_done(&slp->ns_rwlock);
1893 			break;
1894 		}
1895 		/*
1896 		 * Bump the ref count. nfsrv_slpderef below will destroy
1897 		 * the socket, since nfsrv_zapsock has closed it.
1898 		 */
1899 		slp->ns_sref++;
1900 		nfsrv_zapsock(slp);
1901 		lck_rw_done(&slp->ns_rwlock);
1902 		nfsrv_slpderef_locked(slp);
1903 	}
1904 
1905 	/* Start ourself back up */
1906 	nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1907 	/* Remember when the next timer will fire for nfssvc_addsock. */
1908 	nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
1909 	lck_mtx_unlock(&nfsd_mutex);
1910 }
1911 
1912 /*
1913  * Clean up the data structures for the server.
1914  */
1915 void
nfsrv_cleanup(void)1916 nfsrv_cleanup(void)
1917 {
1918 	struct nfsrv_sock *slp, *nslp;
1919 	struct timeval now;
1920 #if CONFIG_FSE
1921 	struct nfsrv_fmod *fp, *nfp;
1922 	int i;
1923 #endif
1924 
1925 	microuptime(&now);
1926 	for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
1927 		nslp = TAILQ_NEXT(slp, ns_chain);
1928 		lck_rw_lock_exclusive(&slp->ns_rwlock);
1929 		slp->ns_sref++;
1930 		if (slp->ns_flag & SLP_VALID) {
1931 			nfsrv_zapsock(slp);
1932 		}
1933 		lck_rw_done(&slp->ns_rwlock);
1934 		nfsrv_slpderef_locked(slp);
1935 	}
1936 #
1937 #if CONFIG_FSE
1938 	/*
1939 	 * Flush pending file write fsevents
1940 	 */
1941 	lck_mtx_lock(&nfsrv_fmod_mutex);
1942 	for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1943 		for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
1944 			/*
1945 			 * Fire off the content modified fsevent for each
1946 			 * entry, remove it from the list, and free it.
1947 			 */
1948 			if (nfsrv_fsevents_enabled) {
1949 				fp->fm_context.vc_thread = current_thread();
1950 				add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
1951 				    FSE_ARG_VNODE, fp->fm_vp,
1952 				    FSE_ARG_DONE);
1953 			}
1954 			vnode_put(fp->fm_vp);
1955 			kauth_cred_unref(&fp->fm_context.vc_ucred);
1956 			nfp = LIST_NEXT(fp, fm_link);
1957 			LIST_REMOVE(fp, fm_link);
1958 			kfree_type(struct nfsrv_fmod, fp);
1959 		}
1960 	}
1961 	nfsrv_fmod_pending = 0;
1962 	lck_mtx_unlock(&nfsrv_fmod_mutex);
1963 #endif
1964 
1965 	nfsrv_uc_cleanup();     /* Stop nfs socket up-call threads */
1966 
1967 	nfs_gss_svc_cleanup();  /* Remove any RPCSEC_GSS contexts */
1968 
1969 	nfsrv_cleancache();     /* And clear out server cache */
1970 
1971 	nfsrv_udpsock = NULL;
1972 	nfsrv_udp6sock = NULL;
1973 }
1974 
1975 #endif /* CONFIG_NFS_SERVER */
1976