xref: /xnu-8020.140.41/bsd/nfs/nfs_syscalls.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1989, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * Rick Macklem at The University of Guelph.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
65  * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66  */
67 
68 #include <nfs/nfs_conf.h>
69 
70 /*
71  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
72  * support for mandatory and extensible security protections.  This notice
73  * is included in support of clause 2.2 (b) of the Apple Public License,
74  * Version 2.0.
75  */
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/kernel.h>
80 #include <sys/file_internal.h>
81 #include <sys/filedesc.h>
82 #include <sys/stat.h>
83 #include <sys/vnode_internal.h>
84 #include <sys/mount_internal.h>
85 #include <sys/proc_internal.h> /* for fdflags */
86 #include <sys/uio_internal.h>
87 #include <sys/kauth.h>
88 #include <sys/sysctl.h>
89 #include <sys/ubc.h>
90 #include <sys/uio.h>
91 #include <sys/malloc.h>
92 #include <sys/kpi_mbuf.h>
93 #include <sys/socket.h>
94 #include <sys/socketvar.h>
95 #include <sys/domain.h>
96 #include <sys/protosw.h>
97 #include <sys/fcntl.h>
98 #include <sys/lockf.h>
99 #include <sys/syslog.h>
100 #include <sys/user.h>
101 #include <sys/sysproto.h>
102 #include <sys/kpi_socket.h>
103 #include <sys/fsevents.h>
104 #include <libkern/OSAtomic.h>
105 #include <kern/thread_call.h>
106 #include <kern/task.h>
107 
108 #include <security/audit/audit.h>
109 
110 #include <netinet/in.h>
111 #include <netinet/tcp.h>
112 #include <nfs/xdr_subs.h>
113 #include <nfs/rpcv2.h>
114 #include <nfs/nfsproto.h>
115 #include <nfs/nfs.h>
116 #include <nfs/nfsm_subs.h>
117 #include <nfs/nfsrvcache.h>
118 #include <nfs/nfs_gss.h>
119 #include <nfs/nfsmount.h>
120 #include <nfs/nfsnode.h>
121 #include <nfs/nfs_lock.h>
122 #if CONFIG_MACF
123 #include <security/mac_framework.h>
124 #endif
125 
126 kern_return_t   thread_terminate(thread_t); /* XXX */
127 
128 #if CONFIG_NFS_SERVER
129 
130 extern const nfsrv_proc_t nfsrv_procs[NFS_NPROCS];
131 
132 extern int nfsrv_wg_delay;
133 extern int nfsrv_wg_delay_v3;
134 
135 static int nfsrv_require_resv_port = 0;
136 static time_t  nfsrv_idlesock_timer_on = 0;
137 static int nfsrv_sock_tcp_cnt = 0;
138 #define NFSD_MIN_IDLE_TIMEOUT 30
139 static int nfsrv_sock_idle_timeout = 3600; /* One hour */
140 
141 int     nfssvc_export(user_addr_t argp);
142 int     nfssvc_exportstats(proc_t p, user_addr_t argp);
143 int     nfssvc_userstats(proc_t p, user_addr_t argp);
144 int     nfssvc_usercount(proc_t p, user_addr_t argp);
145 int     nfssvc_zerostats(void);
146 int     nfssvc_srvstats(proc_t p, user_addr_t argp);
147 int     nfssvc_nfsd(void);
148 int     nfssvc_addsock(socket_t, mbuf_t);
149 void    nfsrv_zapsock(struct nfsrv_sock *);
150 void    nfsrv_slpderef(struct nfsrv_sock *);
151 void    nfsrv_slpfree(struct nfsrv_sock *);
152 
153 #endif /* CONFIG_NFS_SERVER */
154 
155 /*
156  * sysctl stuff
157  */
158 SYSCTL_DECL(_vfs_generic);
159 SYSCTL_EXTENSIBLE_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs hinge");
160 
161 #if CONFIG_NFS_CLIENT
162 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs client hinge");
163 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
164 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
165 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, "");
166 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, "");
167 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, "");
168 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, "");
169 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, "");
170 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, "");
171 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, "");
172 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, "");
173 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, "");
174 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, "");
175 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
176 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
177 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
178 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, "");
179 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "");
180 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, mount_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_mount_timeout, 0, "");
181 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, mount_quick_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_mount_quick_timeout, 0, "");
182 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, split_open_owner, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_split_open_owner, 0, "");
183 SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, tcp_sockbuf, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tcp_sockbuf, 0, "");
184 SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsclnt_debug_ctl, 0, "");
185 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, "");
186 #if CONFIG_NFS_GSS
187 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, "");
188 #endif
189 #if CONFIG_NFS4
190 SYSCTL_STRING(_vfs_generic_nfs_client, OID_AUTO, default_nfs4domain, CTLFLAG_RW | CTLFLAG_LOCKED, nfs4_default_domain, sizeof(nfs4_default_domain), "");
191 #endif
192 #endif /* CONFIG_NFS_CLIENT */
193 
194 #if CONFIG_NFS_SERVER
195 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs server hinge");
196 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
197 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
198 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
199 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
200 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
201 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
202 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
203 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
204 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
205 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_debug_ctl, 0, "");
206 #if CONFIG_FSE
207 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
208 #endif
209 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
210 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
211 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
212 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
213 #ifdef NFS_UC_Q_DEBUG
214 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
215 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
216 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
217 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
218 #endif
219 #endif /* CONFIG_NFS_SERVER */
220 
221 #if !CONFIG_NFS_CLIENT
222 #define __no_nfs_client_unused      __unused
223 #else
224 #define __no_nfs_client_unused      /* nothing */
225 #endif
226 
227 int
nfsclnt(__unused proc_t p,struct nfsclnt_args * uap __no_nfs_client_unused,__unused int * retval)228 nfsclnt(
229 	__unused proc_t p,
230 	struct nfsclnt_args *uap __no_nfs_client_unused,
231 	__unused int *retval)
232 {
233 #if CONFIG_NFS_CLIENT
234 	int error;
235 	vnode_t vp;
236 	vfs_context_t ctx = vfs_context_current();
237 
238 	if (nfsclnt_device_add()) {
239 		printf("nfsclnt: unable to open chardev /dev/%s\n", NFSCLNT_DEVICE);
240 	}
241 
242 	if ((error = vnode_lookup("/dev/" NFSCLNT_DEVICE, 0, &vp, ctx))) {
243 		printf("nfsclnt: unable to find /dev/%s, err %d\n", NFSCLNT_DEVICE, error);
244 		return ENOSYS;
245 	}
246 
247 	if ((error = VNOP_IOCTL(vp, uap->flag, (caddr_t)uap->argp, 0, ctx))) {
248 		printf("nfsclnt: ioctl of /dev/%s returned %d\n", NFSCLNT_DEVICE, error);
249 		vnode_put(vp);
250 		return error;
251 	}
252 
253 	vnode_put(vp);
254 	return 0;
255 #else
256 	return ENOSYS;
257 #endif /* CONFIG_NFS_CLIENT */
258 }
259 
260 #if CONFIG_NFS_CLIENT
261 
262 /*
263  * Asynchronous I/O threads for client NFS.
264  * They do read-ahead and write-behind operations on the block I/O cache.
265  *
266  * The pool of up to nfsiod_thread_max threads is launched on demand and exit
267  * when unused for a while.  There are as many nfsiod structs as there are
268  * nfsiod threads; however there's no strict tie between a thread and a struct.
269  * Each thread puts an nfsiod on the free list and sleeps on it.  When it wakes
270  * up, it removes the next struct nfsiod from the queue and services it.  Then
271  * it will put the struct at the head of free list and sleep on it.
272  * Async requests will pull the next struct nfsiod from the head of the free list,
273  * put it on the work queue, and wake whatever thread is waiting on that struct.
274  */
275 
276 /*
277  * nfsiod thread exit routine
278  *
279  * Must be called with nfsiod_mutex held so that the
280  * decision to terminate is atomic with the termination.
281  */
282 void
nfsiod_terminate(struct nfsiod * niod)283 nfsiod_terminate(struct nfsiod *niod)
284 {
285 	nfsiod_thread_count--;
286 	lck_mtx_unlock(&nfsiod_mutex);
287 	if (niod) {
288 		kfree_type(struct nfsiod, niod);
289 	} else {
290 		printf("nfsiod: terminating without niod\n");
291 	}
292 	thread_terminate(current_thread());
293 	/*NOTREACHED*/
294 }
295 
296 /* nfsiod thread startup routine */
297 void
nfsiod_thread(void)298 nfsiod_thread(void)
299 {
300 	struct nfsiod *niod;
301 	int error;
302 
303 	niod = kalloc_type(struct nfsiod, Z_WAITOK | Z_ZERO | Z_NOFAIL);
304 	lck_mtx_lock(&nfsiod_mutex);
305 	TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
306 	wakeup(current_thread());
307 	error = msleep0(niod, &nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE * hz, nfsiod_continue);
308 	/* shouldn't return... so we have an error */
309 	/* remove an old nfsiod struct and terminate */
310 	lck_mtx_lock(&nfsiod_mutex);
311 	if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
312 		TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
313 	}
314 	nfsiod_terminate(niod);
315 	/*NOTREACHED*/
316 }
317 
318 /*
319  * Start up another nfsiod thread.
320  * (unless we're already maxed out and there are nfsiods running)
321  */
322 int
nfsiod_start(void)323 nfsiod_start(void)
324 {
325 	thread_t thd = THREAD_NULL;
326 
327 	lck_mtx_lock(&nfsiod_mutex);
328 	if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
329 		lck_mtx_unlock(&nfsiod_mutex);
330 		return EBUSY;
331 	}
332 	nfsiod_thread_count++;
333 	if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) {
334 		lck_mtx_unlock(&nfsiod_mutex);
335 		return EBUSY;
336 	}
337 	/* wait for the thread to complete startup */
338 	msleep(thd, &nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
339 	thread_deallocate(thd);
340 	return 0;
341 }
342 
343 /*
344  * Continuation for Asynchronous I/O threads for NFS client.
345  *
346  * Grab an nfsiod struct to work on, do some work, then drop it
347  */
348 int
nfsiod_continue(__unused int error)349 nfsiod_continue(__unused int error)
350 {
351 	struct nfsiod *niod;
352 	struct nfsmount *nmp;
353 	struct nfsreq *req, *treq;
354 	struct nfs_reqqhead iodq;
355 	int morework;
356 
357 	lck_mtx_lock(&nfsiod_mutex);
358 	niod = TAILQ_FIRST(&nfsiodwork);
359 	if (!niod) {
360 		/* there's no work queued up */
361 		/* remove an old nfsiod struct and terminate */
362 		if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
363 			TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
364 		}
365 		nfsiod_terminate(niod);
366 		/*NOTREACHED*/
367 	}
368 	TAILQ_REMOVE(&nfsiodwork, niod, niod_link);
369 
370 worktodo:
371 	while ((nmp = niod->niod_nmp)) {
372 		if (nmp == NULL) {
373 			niod->niod_nmp = NULL;
374 			break;
375 		}
376 
377 		/*
378 		 * Service this mount's async I/O queue.
379 		 *
380 		 * In order to ensure some level of fairness between mounts,
381 		 * we grab all the work up front before processing it so any
382 		 * new work that arrives will be serviced on a subsequent
383 		 * iteration - and we have a chance to see if other work needs
384 		 * to be done (e.g. the delayed write queue needs to be pushed
385 		 * or other mounts are waiting for an nfsiod).
386 		 */
387 		/* grab the current contents of the queue */
388 		TAILQ_INIT(&iodq);
389 		TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
390 		/* Mark each iod request as being managed by an iod */
391 		TAILQ_FOREACH(req, &iodq, r_achain) {
392 			lck_mtx_lock(&req->r_mtx);
393 			assert(!(req->r_flags & R_IOD));
394 			req->r_flags |= R_IOD;
395 			lck_mtx_unlock(&req->r_mtx);
396 		}
397 		lck_mtx_unlock(&nfsiod_mutex);
398 
399 		/* process the queue */
400 		TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
401 			TAILQ_REMOVE(&iodq, req, r_achain);
402 			req->r_achain.tqe_next = NFSREQNOLIST;
403 			req->r_callback.rcb_func(req);
404 		}
405 
406 		/* now check if there's more/other work to be done */
407 		lck_mtx_lock(&nfsiod_mutex);
408 		morework = !TAILQ_EMPTY(&nmp->nm_iodq);
409 		if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
410 			/*
411 			 * we're going to stop working on this mount but if the
412 			 * mount still needs more work so queue it up
413 			 */
414 			if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST) {
415 				TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
416 			}
417 			nmp->nm_niod = NULL;
418 			niod->niod_nmp = NULL;
419 		}
420 	}
421 
422 	/* loop if there's still a mount to work on */
423 	if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
424 		niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
425 		TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
426 		niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST;
427 	}
428 	if (niod->niod_nmp) {
429 		goto worktodo;
430 	}
431 
432 	/* queue ourselves back up - if there aren't too many threads running */
433 	if (nfsiod_thread_count <= NFSIOD_MAX) {
434 		TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
435 		msleep0(niod, &nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE * hz, nfsiod_continue);
436 		/* shouldn't return... so we have an error */
437 		/* remove an old nfsiod struct and terminate */
438 		lck_mtx_lock(&nfsiod_mutex);
439 		if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
440 			TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
441 		}
442 	}
443 	nfsiod_terminate(niod);
444 	/*NOTREACHED*/
445 	return 0;
446 }
447 
448 #endif /* CONFIG_NFS_CLIENT */
449 
450 /* NFS hooks */
451 
452 /* NFS hooks variable */
453 struct nfs_hooks nfsh = {
454 	.f_vinvalbuf      = NULL,
455 	.f_buf_page_inval = NULL
456 };
457 
458 /* NFS hooks registration functions */
459 void
nfs_register_hooks(struct nfs_hooks * hooks)460 nfs_register_hooks(struct nfs_hooks *hooks)
461 {
462 	nfsh.f_vinvalbuf = hooks->f_vinvalbuf;
463 	nfsh.f_buf_page_inval = hooks->f_buf_page_inval;
464 }
465 
466 void
nfs_unregister_hooks(void)467 nfs_unregister_hooks(void)
468 {
469 	memset(&nfsh, 0, sizeof(nfsh));
470 }
471 
472 /* NFS hooks wrappers */
473 int
nfs_vinvalbuf(vnode_t vp,int flags,vfs_context_t ctx,int intrflg)474 nfs_vinvalbuf(vnode_t vp, int flags, vfs_context_t ctx, int intrflg)
475 {
476 	if (nfsh.f_vinvalbuf == NULL) {
477 		return 0;
478 	}
479 
480 	return nfsh.f_vinvalbuf(vp, flags, ctx, intrflg);
481 }
482 
483 int
nfs_buf_page_inval(vnode_t vp,off_t offset)484 nfs_buf_page_inval(vnode_t vp, off_t offset)
485 {
486 	if (nfsh.f_buf_page_inval == NULL) {
487 		return 0;
488 	}
489 
490 	return nfsh.f_buf_page_inval(vp, offset);
491 }
492 
493 #if !CONFIG_NFS_SERVER
494 #define __no_nfs_server_unused      __unused
495 #else
496 #define __no_nfs_server_unused      /* nothing */
497 #endif
498 
499 /*
500  * NFS server system calls
501  * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
502  */
503 
504 #if CONFIG_NFS_SERVER
505 static struct nfs_exportfs *
nfsrv_find_exportfs(const char * ptr)506 nfsrv_find_exportfs(const char *ptr)
507 {
508 	struct nfs_exportfs *nxfs;
509 
510 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
511 		if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) {
512 			break;
513 		}
514 	}
515 	if (nxfs && strncmp(nxfs->nxfs_path, ptr, strlen(nxfs->nxfs_path))) {
516 		nxfs = NULL;
517 	}
518 
519 	return nxfs;
520 }
521 
522 #define DATA_VOLUME_MP "/System/Volumes/Data" // PLATFORM_DATA_VOLUME_MOUNT_POINT
523 
524 /*
525  * Get file handle system call
526  */
527 int
getfh(proc_t p __no_nfs_server_unused,struct getfh_args * uap __no_nfs_server_unused,__unused int * retval)528 getfh(
529 	proc_t p __no_nfs_server_unused,
530 	struct getfh_args *uap __no_nfs_server_unused,
531 	__unused int *retval)
532 {
533 	vnode_t vp;
534 	struct nfs_filehandle nfh;
535 	int error, fhlen = 0, fidlen;
536 	struct nameidata nd;
537 	char path[MAXPATHLEN], real_mntonname[MAXPATHLEN], *ptr;
538 	size_t datavol_len = strlen(DATA_VOLUME_MP);
539 	size_t pathlen;
540 	struct nfs_exportfs *nxfs;
541 	struct nfs_export *nx;
542 
543 	/*
544 	 * Must be super user
545 	 */
546 	error = proc_suser(p);
547 	if (error) {
548 		return error;
549 	}
550 
551 	error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
552 	if (!error) {
553 		error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
554 	}
555 	if (error) {
556 		return error;
557 	}
558 	/* limit fh size to length specified (or v3 size by default) */
559 	if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) {
560 		fhlen = NFSV3_MAX_FH_SIZE;
561 	}
562 	fidlen = fhlen - sizeof(struct nfs_exphandle);
563 
564 	if (!nfsrv_is_initialized()) {
565 		return EINVAL;
566 	}
567 
568 	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
569 	    UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
570 	error = namei(&nd);
571 	if (error) {
572 		return error;
573 	}
574 	nameidone(&nd);
575 
576 	vp = nd.ni_vp;
577 
578 	// find exportfs that matches f_mntonname
579 	lck_rw_lock_shared(&nfsrv_export_rwlock);
580 	ptr = vfs_statfs(vnode_mount(vp))->f_mntonname;
581 	if ((nxfs = nfsrv_find_exportfs(ptr)) == NULL) {
582 		/*
583 		 * The f_mntonname might be a firmlink path.  Resolve
584 		 * it into a physical path and try again.
585 		 */
586 		int pathbuflen = MAXPATHLEN;
587 		vnode_t rvp;
588 
589 		error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
590 		if (error) {
591 			goto out;
592 		}
593 		error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen,
594 		    VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
595 		vnode_put(rvp);
596 		if (error) {
597 			goto out;
598 		}
599 		ptr = real_mntonname;
600 		nxfs = nfsrv_find_exportfs(ptr);
601 	}
602 	if (nxfs == NULL) {
603 		error = EINVAL;
604 		goto out;
605 	}
606 	// find export that best matches remainder of path
607 	if (!strncmp(path, nxfs->nxfs_path, strlen(nxfs->nxfs_path))) {
608 		ptr = path + strlen(nxfs->nxfs_path);
609 	} else if (!strncmp(path, DATA_VOLUME_MP, datavol_len) && !strncmp(path + datavol_len, nxfs->nxfs_path, strlen(nxfs->nxfs_path))) {
610 		ptr = path + datavol_len + strlen(nxfs->nxfs_path);
611 	} else {
612 		error = EINVAL;
613 		goto out;
614 	}
615 
616 	while (*ptr && (*ptr == '/')) {
617 		ptr++;
618 	}
619 	LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
620 		size_t len = strlen(nx->nx_path);
621 		if (len == 0) { // we've hit the export entry for the root directory
622 			break;
623 		}
624 		if (!strncmp(nx->nx_path, ptr, len)) {
625 			break;
626 		}
627 	}
628 	if (!nx) {
629 		error = EINVAL;
630 		goto out;
631 	}
632 
633 	bzero(&nfh, sizeof(nfh));
634 	nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
635 	nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
636 	nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
637 	nfh.nfh_xh.nxh_flags = 0;
638 	nfh.nfh_xh.nxh_reserved = 0;
639 	nfh.nfh_len = fidlen;
640 	error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
641 	if (nfh.nfh_len > (uint32_t)fidlen) {
642 		error = EOVERFLOW;
643 	}
644 	nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
645 	nfh.nfh_len += sizeof(nfh.nfh_xh);
646 	nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
647 
648 out:
649 	lck_rw_done(&nfsrv_export_rwlock);
650 	vnode_put(vp);
651 	if (error) {
652 		return error;
653 	}
654 	/*
655 	 * At first blush, this may appear to leak a kernel stack
656 	 * address, but the copyout() never reaches &nfh.nfh_fhp
657 	 * (sizeof(fhandle_t) < sizeof(nfh)).
658 	 */
659 	error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
660 	return error;
661 }
662 #endif /* CONFIG_NFS_SERVER */
663 
664 #if CONFIG_NFS_SERVER
665 extern const struct fileops vnops;
666 
667 /*
668  * syscall for the rpc.lockd to use to translate a NFS file handle into
669  * an open descriptor.
670  *
671  * warning: do not remove the suser() call or this becomes one giant
672  * security hole.
673  */
674 int
fhopen(proc_t p __no_nfs_server_unused,struct fhopen_args * uap __no_nfs_server_unused,int32_t * retval __no_nfs_server_unused)675 fhopen(proc_t p __no_nfs_server_unused,
676     struct fhopen_args *uap __no_nfs_server_unused,
677     int32_t *retval __no_nfs_server_unused)
678 {
679 	vnode_t vp;
680 	struct nfs_filehandle nfh;
681 	struct nfs_export *nx;
682 	struct nfs_export_options *nxo;
683 	struct flock lf;
684 	struct fileproc *fp, *nfp;
685 	int fmode, error, type;
686 	int indx;
687 	vfs_context_t ctx = vfs_context_current();
688 	kauth_action_t action;
689 
690 	/*
691 	 * Must be super user
692 	 */
693 	error = suser(vfs_context_ucred(ctx), 0);
694 	if (error) {
695 		return error;
696 	}
697 
698 	if (!nfsrv_is_initialized()) {
699 		return EINVAL;
700 	}
701 
702 	fmode = FFLAGS(uap->flags);
703 	/* why not allow a non-read/write open for our lockd? */
704 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) {
705 		return EINVAL;
706 	}
707 
708 	error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
709 	if (error) {
710 		return error;
711 	}
712 	if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
713 	    (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) {
714 		return EINVAL;
715 	}
716 	error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
717 	if (error) {
718 		return error;
719 	}
720 	nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
721 
722 	lck_rw_lock_shared(&nfsrv_export_rwlock);
723 	/* now give me my vnode, it gets returned to me with a reference */
724 	error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
725 	lck_rw_done(&nfsrv_export_rwlock);
726 	if (error) {
727 		if (error == NFSERR_TRYLATER) {
728 			error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
729 		}
730 		return error;
731 	}
732 
733 	/*
734 	 * From now on we have to make sure not
735 	 * to forget about the vnode.
736 	 * Any error that causes an abort must vnode_put(vp).
737 	 * Just set error = err and 'goto bad;'.
738 	 */
739 
740 	/*
741 	 * from vn_open
742 	 */
743 	if (vnode_vtype(vp) == VSOCK) {
744 		error = EOPNOTSUPP;
745 		goto bad;
746 	}
747 
748 	/* disallow write operations on directories */
749 	if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
750 		error = EISDIR;
751 		goto bad;
752 	}
753 
754 #if CONFIG_MACF
755 	if ((error = mac_vnode_check_open(ctx, vp, fmode))) {
756 		goto bad;
757 	}
758 #endif
759 
760 	/* compute action to be authorized */
761 	action = 0;
762 	if (fmode & FREAD) {
763 		action |= KAUTH_VNODE_READ_DATA;
764 	}
765 	if (fmode & (FWRITE | O_TRUNC)) {
766 		action |= KAUTH_VNODE_WRITE_DATA;
767 	}
768 	if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) {
769 		goto bad;
770 	}
771 
772 	if ((error = VNOP_OPEN(vp, fmode, ctx))) {
773 		goto bad;
774 	}
775 	if ((error = vnode_ref_ext(vp, fmode, 0))) {
776 		goto bad;
777 	}
778 
779 	/*
780 	 * end of vn_open code
781 	 */
782 
783 	// starting here... error paths should call vn_close/vnode_put
784 	if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
785 		vn_close(vp, fmode & FMASK, ctx);
786 		goto bad;
787 	}
788 	fp = nfp;
789 
790 	fp->fp_glob->fg_flag = fmode & FMASK;
791 	fp->fp_glob->fg_ops = &vnops;
792 	fp_set_data(fp, vp);
793 
794 	// XXX do we really need to support this with fhopen()?
795 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
796 		lf.l_whence = SEEK_SET;
797 		lf.l_start = 0;
798 		lf.l_len = 0;
799 		if (fmode & O_EXLOCK) {
800 			lf.l_type = F_WRLCK;
801 		} else {
802 			lf.l_type = F_RDLCK;
803 		}
804 		type = F_FLOCK;
805 		if ((fmode & FNONBLOCK) == 0) {
806 			type |= F_WAIT;
807 		}
808 		if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf, type, ctx, NULL))) {
809 			struct vfs_context context = *vfs_context_current();
810 			/* Modify local copy (to not damage thread copy) */
811 			context.vc_ucred = fp->fp_glob->fg_cred;
812 
813 			vn_close(vp, fp->fp_glob->fg_flag, &context);
814 			fp_free(p, indx, fp);
815 			goto bad;
816 		}
817 		fp->fp_glob->fg_flag |= FWASLOCKED;
818 	}
819 
820 	vnode_put(vp);
821 
822 	proc_fdlock(p);
823 	procfdtbl_releasefd(p, indx, NULL);
824 	fp_drop(p, indx, fp, 1);
825 	proc_fdunlock(p);
826 
827 	*retval = indx;
828 	return 0;
829 
830 bad:
831 	vnode_put(vp);
832 	return error;
833 }
834 #endif /* CONFIG_NFS_SERVER */
835 
836 #if CONFIG_NFS_SERVER
837 /*
838  * NFS server pseudo system call
839  */
840 int
nfssvc(proc_t p __no_nfs_server_unused,struct nfssvc_args * uap __no_nfs_server_unused,__unused int * retval)841 nfssvc(proc_t p __no_nfs_server_unused,
842     struct nfssvc_args *uap __no_nfs_server_unused,
843     __unused int *retval)
844 {
845 	mbuf_t nam;
846 	struct user_nfsd_args user_nfsdarg;
847 	socket_t so;
848 	int error;
849 
850 	AUDIT_ARG(cmd, uap->flag);
851 
852 	/*
853 	 * Must be super user for NFSSVC_NFSD and NFSSVC_ADDSOCK operations.
854 	 */
855 	if (((uap->flag == NFSSVC_NFSD) || (uap->flag == NFSSVC_ADDSOCK)) && ((error = proc_suser(p)))) {
856 		return error;
857 	}
858 #if CONFIG_MACF
859 	error = mac_system_check_nfsd(kauth_cred_get());
860 	if (error) {
861 		return error;
862 	}
863 #endif
864 
865 	/* make sure NFS server data structures have been initialized */
866 	nfsrv_init();
867 
868 	if (uap->flag & NFSSVC_ADDSOCK) {
869 		if (IS_64BIT_PROCESS(p)) {
870 			error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
871 		} else {
872 			struct nfsd_args    tmp_args;
873 			error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
874 			if (error == 0) {
875 				user_nfsdarg.sock = tmp_args.sock;
876 				user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
877 				user_nfsdarg.namelen = tmp_args.namelen;
878 			}
879 		}
880 		if (error) {
881 			return error;
882 		}
883 		/* get the socket */
884 		error = file_socket(user_nfsdarg.sock, &so);
885 		if (error) {
886 			return error;
887 		}
888 		/* Get the client address for connected sockets. */
889 		if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
890 			nam = NULL;
891 		} else {
892 			error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
893 			if (error) {
894 				/* drop the iocount file_socket() grabbed on the file descriptor */
895 				file_drop(user_nfsdarg.sock);
896 				return error;
897 			}
898 		}
899 		/*
900 		 * nfssvc_addsock() will grab a retain count on the socket
901 		 * to keep the socket from being closed when nfsd closes its
902 		 * file descriptor for it.
903 		 */
904 		error = nfssvc_addsock(so, nam);
905 		/* drop the iocount file_socket() grabbed on the file descriptor */
906 		file_drop(user_nfsdarg.sock);
907 	} else if (uap->flag & NFSSVC_NFSD) {
908 		error = nfssvc_nfsd();
909 	} else if (uap->flag & NFSSVC_EXPORT) {
910 		error = nfssvc_export(uap->argp);
911 	} else if (uap->flag & NFSSVC_EXPORTSTATS) {
912 		error = nfssvc_exportstats(p, uap->argp);
913 	} else if (uap->flag & NFSSVC_USERSTATS) {
914 		error = nfssvc_userstats(p, uap->argp);
915 	} else if (uap->flag & NFSSVC_USERCOUNT) {
916 		error = nfssvc_usercount(p, uap->argp);
917 	} else if (uap->flag & NFSSVC_ZEROSTATS) {
918 		error = nfssvc_zerostats();
919 	} else if (uap->flag & NFSSVC_SRVSTATS) {
920 		error = nfssvc_srvstats(p, uap->argp);
921 	} else {
922 		error = EINVAL;
923 	}
924 	if (error == EINTR || error == ERESTART) {
925 		error = 0;
926 	}
927 	return error;
928 }
929 #endif /* CONFIG_NFS_SERVER */
930 
931 #if CONFIG_NFS_SERVER
932 
933 /*
934  * Adds a socket to the list for servicing by nfsds.
935  */
936 int
nfssvc_addsock(socket_t so,mbuf_t mynam)937 nfssvc_addsock(socket_t so, mbuf_t mynam)
938 {
939 	struct nfsrv_sock *slp;
940 	int error = 0, sodomain, sotype, soprotocol, on = 1;
941 	int first, sobufsize;
942 	struct timeval timeo;
943 	u_quad_t sbmaxsize;
944 
945 	/* make sure mbuf constants are set up */
946 	if (!nfs_mbuf_mhlen) {
947 		nfs_mbuf_init();
948 	}
949 
950 	sock_gettype(so, &sodomain, &sotype, &soprotocol);
951 
952 	/* There should be only one UDP socket for each of IPv4 and IPv6 */
953 	if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
954 		mbuf_freem(mynam);
955 		return EEXIST;
956 	}
957 	if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
958 		mbuf_freem(mynam);
959 		return EEXIST;
960 	}
961 
962 	/* Set protocol options and reserve some space (for UDP). */
963 	if (sotype == SOCK_STREAM) {
964 		error = nfsrv_check_exports_allow_address(mynam);
965 		if (error) {
966 			log(LOG_INFO, "nfsvc_addsock:: nfsrv_check_exports_allow_address(myname) returned %d\n", error);
967 			mbuf_freem(mynam);
968 			return error;
969 		}
970 		sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
971 	}
972 	if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) {
973 		sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
974 	}
975 
976 	/* Calculate maximum supported socket buffers sizes */
977 	sbmaxsize = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
978 
979 	/* Set socket buffer sizes for UDP/TCP */
980 	sobufsize = min(sbmaxsize, (sotype == SOCK_DGRAM) ? NFS_UDPSOCKBUF : NFSRV_TCPSOCKBUF);
981 	error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &sobufsize, sizeof(sobufsize));
982 	error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &sobufsize, sizeof(sobufsize));
983 
984 	if (error) {
985 		log(LOG_INFO, "nfssvc_addsock: socket buffer setting error(s) %d\n", error);
986 		error = 0;
987 	}
988 	sock_nointerrupt(so, 0);
989 
990 	/*
991 	 * Set socket send/receive timeouts.
992 	 * Receive timeout shouldn't matter, but setting the send timeout
993 	 * will make sure that an unresponsive client can't hang the server.
994 	 */
995 	timeo.tv_usec = 0;
996 	timeo.tv_sec = 1;
997 	error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
998 	timeo.tv_sec = 30;
999 	error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
1000 	if (error) {
1001 		log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
1002 		error = 0;
1003 	}
1004 
1005 	slp = kalloc_type(struct nfsrv_sock, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1006 	lck_rw_init(&slp->ns_rwlock, &nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
1007 	lck_mtx_init(&slp->ns_wgmutex, &nfsrv_slp_mutex_group, LCK_ATTR_NULL);
1008 
1009 	lck_mtx_lock(&nfsd_mutex);
1010 
1011 	if (soprotocol == IPPROTO_UDP) {
1012 		if (sodomain == AF_INET) {
1013 			/* There should be only one UDP/IPv4 socket */
1014 			if (nfsrv_udpsock) {
1015 				lck_mtx_unlock(&nfsd_mutex);
1016 				nfsrv_slpfree(slp);
1017 				mbuf_freem(mynam);
1018 				return EEXIST;
1019 			}
1020 			nfsrv_udpsock = slp;
1021 		}
1022 		if (sodomain == AF_INET6) {
1023 			/* There should be only one UDP/IPv6 socket */
1024 			if (nfsrv_udp6sock) {
1025 				lck_mtx_unlock(&nfsd_mutex);
1026 				nfsrv_slpfree(slp);
1027 				mbuf_freem(mynam);
1028 				return EEXIST;
1029 			}
1030 			nfsrv_udp6sock = slp;
1031 		}
1032 	}
1033 
1034 	/* add the socket to the list */
1035 	first = TAILQ_EMPTY(&nfsrv_socklist);
1036 	TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
1037 	if (sotype == SOCK_STREAM) {
1038 		nfsrv_sock_tcp_cnt++;
1039 		if (nfsrv_sock_idle_timeout < 0) {
1040 			nfsrv_sock_idle_timeout = 0;
1041 		}
1042 		if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) {
1043 			nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
1044 		}
1045 		/*
1046 		 * Possibly start or stop the idle timer. We only start the idle timer when
1047 		 * we have more than 2 * nfsd_thread_max connections. If the idle timer is
1048 		 * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
1049 		 * the number of connections.
1050 		 */
1051 		if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
1052 			if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
1053 				if (nfsrv_idlesock_timer_on) {
1054 					thread_call_cancel(nfsrv_idlesock_timer_call);
1055 					nfsrv_idlesock_timer_on = 0;
1056 				}
1057 			} else {
1058 				struct nfsrv_sock *old_slp;
1059 				struct timeval now;
1060 				microuptime(&now);
1061 				time_t time_to_wait = nfsrv_sock_idle_timeout;
1062 				/*
1063 				 * Get the oldest tcp socket and calculate the
1064 				 * earliest time for the next idle timer to fire
1065 				 * based on the possibly updated nfsrv_sock_idle_timeout
1066 				 */
1067 				TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
1068 					if (old_slp->ns_sotype == SOCK_STREAM) {
1069 						time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
1070 						if (time_to_wait < 1) {
1071 							time_to_wait = 1;
1072 						}
1073 						break;
1074 					}
1075 				}
1076 				/*
1077 				 * If we have a timer scheduled, but if its going to fire too late,
1078 				 * turn it off.
1079 				 */
1080 				if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
1081 					thread_call_cancel(nfsrv_idlesock_timer_call);
1082 					nfsrv_idlesock_timer_on = 0;
1083 				}
1084 				/* Schedule the idle thread if it isn't already */
1085 				if (!nfsrv_idlesock_timer_on) {
1086 					nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1087 					nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
1088 				}
1089 			}
1090 		}
1091 	}
1092 
1093 	sock_retain(so); /* grab a retain count on the socket */
1094 	slp->ns_so = so;
1095 	slp->ns_sotype = sotype;
1096 	slp->ns_nam = mynam;
1097 	slp->ns_sobufsize = sobufsize;
1098 
1099 	/* set up the socket up-call */
1100 	nfsrv_uc_addsock(slp, first);
1101 
1102 	/* mark that the socket is not in the nfsrv_sockwg list */
1103 	slp->ns_wgq.tqe_next = SLPNOLIST;
1104 
1105 	slp->ns_flag = SLP_VALID | SLP_NEEDQ;
1106 
1107 	nfsrv_wakenfsd(slp);
1108 	lck_mtx_unlock(&nfsd_mutex);
1109 
1110 	return 0;
1111 }
1112 
1113 /*
1114  * nfssvc_nfsd()
1115  *
1116  * nfsd theory of operation:
1117  *
1118  * The first nfsd thread stays in user mode accepting new TCP connections
1119  * which are then added via the "addsock" call.  The rest of the nfsd threads
1120  * simply call into the kernel and remain there in a loop handling NFS
1121  * requests until killed by a signal.
1122  *
1123  * There's a list of nfsd threads (nfsd_head).
1124  * There's an nfsd queue that contains only those nfsds that are
1125  *   waiting for work to do (nfsd_queue).
1126  *
1127  * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
1128  *   managing the work on the sockets:
1129  *   nfsrv_sockwait - sockets w/new data waiting to be worked on
1130  *   nfsrv_sockwork - sockets being worked on which may have more work to do
1131  *   nfsrv_sockwg -- sockets which have pending write gather data
1132  * When a socket receives data, if it is not currently queued, it
1133  *   will be placed at the end of the "wait" queue.
1134  * Whenever a socket needs servicing we make sure it is queued and
1135  *   wake up a waiting nfsd (if there is one).
1136  *
1137  * nfsds will service at most 8 requests from the same socket before
1138  *   defecting to work on another socket.
1139  * nfsds will defect immediately if there are any sockets in the "wait" queue
1140  * nfsds looking for a socket to work on check the "wait" queue first and
1141  *   then check the "work" queue.
1142  * When an nfsd starts working on a socket, it removes it from the head of
1143  *   the queue it's currently on and moves it to the end of the "work" queue.
1144  * When nfsds are checking the queues for work, any sockets found not to
1145  *   have any work are simply dropped from the queue.
1146  *
1147  */
1148 int
nfssvc_nfsd(void)1149 nfssvc_nfsd(void)
1150 {
1151 	mbuf_t m, mrep = NULL;
1152 	struct nfsrv_sock *slp;
1153 	struct nfsd *nfsd;
1154 	struct nfsrv_descript *nd = NULL;
1155 	int error = 0, cacherep, writes_todo;
1156 	int siz, procrastinate, opcnt = 0;
1157 	time_t cur_usec;
1158 	struct timeval now;
1159 	struct vfs_context context;
1160 	struct timespec to;
1161 
1162 #ifndef nolint
1163 	cacherep = RC_DOIT;
1164 	writes_todo = 0;
1165 #endif
1166 
1167 	nfsd = kalloc_type(struct nfsd, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1168 	lck_mtx_lock(&nfsd_mutex);
1169 	if (nfsd_thread_count++ == 0) {
1170 		nfsrv_initcache();              /* Init the server request cache */
1171 	}
1172 	TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
1173 	lck_mtx_unlock(&nfsd_mutex);
1174 
1175 	context.vc_thread = current_thread();
1176 
1177 	/* Set time out so that nfsd threads can wake up a see if they are still needed. */
1178 	to.tv_sec = 5;
1179 	to.tv_nsec = 0;
1180 
1181 	/*
1182 	 * Loop getting rpc requests until SIGKILL.
1183 	 */
1184 	for (;;) {
1185 		if (nfsd_thread_max <= 0) {
1186 			/* NFS server shutting down, get out ASAP */
1187 			error = EINTR;
1188 			slp = nfsd->nfsd_slp;
1189 		} else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
1190 			/* already have some work to do */
1191 			error = 0;
1192 			slp = nfsd->nfsd_slp;
1193 		} else {
1194 			/* need to find work to do */
1195 			error = 0;
1196 			lck_mtx_lock(&nfsd_mutex);
1197 			while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
1198 				if (nfsd_thread_count > nfsd_thread_max) {
1199 					/*
1200 					 * If we have no socket and there are more
1201 					 * nfsd threads than configured, let's exit.
1202 					 */
1203 					error = 0;
1204 					goto done;
1205 				}
1206 				nfsd->nfsd_flag |= NFSD_WAITING;
1207 				TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
1208 				error = msleep(nfsd, &nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
1209 				if (error) {
1210 					if (nfsd->nfsd_flag & NFSD_WAITING) {
1211 						TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
1212 						nfsd->nfsd_flag &= ~NFSD_WAITING;
1213 					}
1214 					if (error == EWOULDBLOCK) {
1215 						continue;
1216 					}
1217 					goto done;
1218 				}
1219 			}
1220 			slp = nfsd->nfsd_slp;
1221 			if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
1222 				/* look for a socket to work on in the wait queue */
1223 				while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
1224 					lck_rw_lock_exclusive(&slp->ns_rwlock);
1225 					/* remove from the head of the queue */
1226 					TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1227 					slp->ns_flag &= ~SLP_WAITQ;
1228 					if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
1229 						break;
1230 					}
1231 					/* nothing to do, so skip this socket */
1232 					lck_rw_done(&slp->ns_rwlock);
1233 				}
1234 			}
1235 			if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
1236 				/* look for a socket to work on in the work queue */
1237 				while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
1238 					lck_rw_lock_exclusive(&slp->ns_rwlock);
1239 					/* remove from the head of the queue */
1240 					TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1241 					slp->ns_flag &= ~SLP_WORKQ;
1242 					if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
1243 						break;
1244 					}
1245 					/* nothing to do, so skip this socket */
1246 					lck_rw_done(&slp->ns_rwlock);
1247 				}
1248 			}
1249 			if (!nfsd->nfsd_slp && slp) {
1250 				/* we found a socket to work on, grab a reference */
1251 				slp->ns_sref++;
1252 				microuptime(&now);
1253 				slp->ns_timestamp = now.tv_sec;
1254 				/* We keep the socket list in least recently used order for reaping idle sockets */
1255 				TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1256 				TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
1257 				nfsd->nfsd_slp = slp;
1258 				opcnt = 0;
1259 				/* and put it at the back of the work queue */
1260 				TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
1261 				slp->ns_flag |= SLP_WORKQ;
1262 				lck_rw_done(&slp->ns_rwlock);
1263 			}
1264 			lck_mtx_unlock(&nfsd_mutex);
1265 			if (!slp) {
1266 				continue;
1267 			}
1268 			lck_rw_lock_exclusive(&slp->ns_rwlock);
1269 			if (slp->ns_flag & SLP_VALID) {
1270 				if ((slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)) == SLP_NEEDQ) {
1271 					slp->ns_flag &= ~SLP_NEEDQ;
1272 					nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
1273 				}
1274 				if (slp->ns_flag & SLP_DISCONN) {
1275 					nfsrv_zapsock(slp);
1276 				}
1277 				error = nfsrv_dorec(slp, nfsd, &nd);
1278 				if (error == EINVAL) {  // RPCSEC_GSS drop
1279 					if (slp->ns_sotype == SOCK_STREAM) {
1280 						nfsrv_zapsock(slp); // drop connection
1281 					}
1282 				}
1283 				writes_todo = 0;
1284 				if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
1285 					microuptime(&now);
1286 					cur_usec = (now.tv_sec * 1000000) + now.tv_usec;
1287 					if (slp->ns_wgtime <= cur_usec) {
1288 						error = 0;
1289 						cacherep = RC_DOIT;
1290 						writes_todo = 1;
1291 					}
1292 					slp->ns_flag &= ~SLP_DOWRITES;
1293 				}
1294 				nfsd->nfsd_flag |= NFSD_REQINPROG;
1295 			}
1296 			lck_rw_done(&slp->ns_rwlock);
1297 		}
1298 		if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
1299 			if (nd) {
1300 				nfsm_chain_cleanup(&nd->nd_nmreq);
1301 				if (nd->nd_nam2) {
1302 					mbuf_freem(nd->nd_nam2);
1303 				}
1304 				if (IS_VALID_CRED(nd->nd_cr)) {
1305 					kauth_cred_unref(&nd->nd_cr);
1306 				}
1307 				if (nd->nd_gss_context) {
1308 					nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1309 				}
1310 				NFS_ZFREE(nfsrv_descript_zone, nd);
1311 			}
1312 			nfsd->nfsd_slp = NULL;
1313 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1314 			if (slp) {
1315 				nfsrv_slpderef(slp);
1316 			}
1317 			if (nfsd_thread_max <= 0) {
1318 				break;
1319 			}
1320 			continue;
1321 		}
1322 		if (nd) {
1323 			microuptime(&nd->nd_starttime);
1324 			if (nd->nd_nam2) {
1325 				nd->nd_nam = nd->nd_nam2;
1326 			} else {
1327 				nd->nd_nam = slp->ns_nam;
1328 			}
1329 
1330 			cacherep = nfsrv_getcache(nd, slp, &mrep);
1331 
1332 			if (nfsrv_require_resv_port) {
1333 				/* Check if source port is a reserved port */
1334 				in_port_t port = 0;
1335 				struct sockaddr *saddr = mbuf_data(nd->nd_nam);
1336 
1337 				if (saddr->sa_family == AF_INET) {
1338 					port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
1339 				} else if (saddr->sa_family == AF_INET6) {
1340 					port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
1341 				}
1342 				if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
1343 					nd->nd_procnum = NFSPROC_NOOP;
1344 					nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1345 					cacherep = RC_DOIT;
1346 				}
1347 			}
1348 		}
1349 
1350 		/*
1351 		 * Loop to get all the write RPC replies that have been
1352 		 * gathered together.
1353 		 */
1354 		do {
1355 			switch (cacherep) {
1356 			case RC_DOIT:
1357 				if (nd && (nd->nd_vers == NFS_VER3)) {
1358 					procrastinate = nfsrv_wg_delay_v3;
1359 				} else {
1360 					procrastinate = nfsrv_wg_delay;
1361 				}
1362 				lck_rw_lock_shared(&nfsrv_export_rwlock);
1363 				context.vc_ucred = NULL;
1364 				if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) {
1365 					error = nfsrv_writegather(&nd, slp, &context, &mrep);
1366 				} else {
1367 					error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
1368 				}
1369 				lck_rw_done(&nfsrv_export_rwlock);
1370 				if (mrep == NULL) {
1371 					/*
1372 					 * If this is a stream socket and we are not going
1373 					 * to send a reply we better close the connection
1374 					 * so the client doesn't hang.
1375 					 */
1376 					if (error && slp->ns_sotype == SOCK_STREAM) {
1377 						lck_rw_lock_exclusive(&slp->ns_rwlock);
1378 						nfsrv_zapsock(slp);
1379 						lck_rw_done(&slp->ns_rwlock);
1380 						printf("NFS server: NULL reply from proc = %d error = %d\n",
1381 						    nd->nd_procnum, error);
1382 					}
1383 					break;
1384 				}
1385 				if (error) {
1386 					OSAddAtomic64(1, &nfsrvstats.srv_errs);
1387 					nfsrv_updatecache(nd, FALSE, mrep);
1388 					if (nd->nd_nam2) {
1389 						mbuf_freem(nd->nd_nam2);
1390 						nd->nd_nam2 = NULL;
1391 					}
1392 					break;
1393 				}
1394 				OSAddAtomic64(1, &nfsrvstats.srvrpccntv3[nd->nd_procnum]);
1395 				nfsrv_updatecache(nd, TRUE, mrep);
1396 				OS_FALLTHROUGH;
1397 
1398 			case RC_REPLY:
1399 				if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1400 					/*
1401 					 * Need to checksum or encrypt the reply
1402 					 */
1403 					error = nfs_gss_svc_protect_reply(nd, mrep);
1404 					if (error) {
1405 						mbuf_freem(mrep);
1406 						break;
1407 					}
1408 				}
1409 
1410 				/*
1411 				 * Get the total size of the reply
1412 				 */
1413 				m = mrep;
1414 				siz = 0;
1415 				while (m) {
1416 					siz += mbuf_len(m);
1417 					m = mbuf_next(m);
1418 				}
1419 				if (siz <= 0 || siz > NFS_MAXPACKET) {
1420 					printf("mbuf siz=%d\n", siz);
1421 					panic("Bad nfs svc reply");
1422 				}
1423 				m = mrep;
1424 				mbuf_pkthdr_setlen(m, siz);
1425 				error = mbuf_pkthdr_setrcvif(m, NULL);
1426 				if (error) {
1427 					panic("nfsd setrcvif failed: %d", error);
1428 				}
1429 				/*
1430 				 * For stream protocols, prepend a Sun RPC
1431 				 * Record Mark.
1432 				 */
1433 				if (slp->ns_sotype == SOCK_STREAM) {
1434 					error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1435 					if (!error) {
1436 						*(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
1437 					}
1438 				}
1439 				if (!error) {
1440 					if (slp->ns_flag & SLP_VALID) {
1441 						error = nfsrv_send(slp, nd->nd_nam2, m);
1442 					} else {
1443 						error = EPIPE;
1444 						mbuf_freem(m);
1445 					}
1446 				} else {
1447 					mbuf_freem(m);
1448 				}
1449 				mrep = NULL;
1450 				if (nd->nd_nam2) {
1451 					mbuf_freem(nd->nd_nam2);
1452 					nd->nd_nam2 = NULL;
1453 				}
1454 				if (error == EPIPE) {
1455 					lck_rw_lock_exclusive(&slp->ns_rwlock);
1456 					nfsrv_zapsock(slp);
1457 					lck_rw_done(&slp->ns_rwlock);
1458 				}
1459 				if (error == EINTR || error == ERESTART) {
1460 					nfsm_chain_cleanup(&nd->nd_nmreq);
1461 					if (IS_VALID_CRED(nd->nd_cr)) {
1462 						kauth_cred_unref(&nd->nd_cr);
1463 					}
1464 					if (nd->nd_gss_context) {
1465 						nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1466 					}
1467 					NFS_ZFREE(nfsrv_descript_zone, nd);
1468 					nfsrv_slpderef(slp);
1469 					lck_mtx_lock(&nfsd_mutex);
1470 					goto done;
1471 				}
1472 				break;
1473 			case RC_DROPIT:
1474 				mbuf_freem(nd->nd_nam2);
1475 				nd->nd_nam2 = NULL;
1476 				break;
1477 			}
1478 			;
1479 			opcnt++;
1480 			if (nd) {
1481 				nfsm_chain_cleanup(&nd->nd_nmreq);
1482 				if (nd->nd_nam2) {
1483 					mbuf_freem(nd->nd_nam2);
1484 				}
1485 				if (IS_VALID_CRED(nd->nd_cr)) {
1486 					kauth_cred_unref(&nd->nd_cr);
1487 				}
1488 				if (nd->nd_gss_context) {
1489 					nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1490 				}
1491 				NFS_ZFREE(nfsrv_descript_zone, nd);
1492 			}
1493 
1494 			/*
1495 			 * Check to see if there are outstanding writes that
1496 			 * need to be serviced.
1497 			 */
1498 			writes_todo = 0;
1499 			if (slp->ns_wgtime) {
1500 				microuptime(&now);
1501 				cur_usec = (now.tv_sec * 1000000) + now.tv_usec;
1502 				if (slp->ns_wgtime <= cur_usec) {
1503 					cacherep = RC_DOIT;
1504 					writes_todo = 1;
1505 				}
1506 			}
1507 		} while (writes_todo);
1508 
1509 		nd = NULL;
1510 		if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1511 			lck_rw_lock_exclusive(&slp->ns_rwlock);
1512 			error = nfsrv_dorec(slp, nfsd, &nd);
1513 			if (error == EINVAL) {  // RPCSEC_GSS drop
1514 				if (slp->ns_sotype == SOCK_STREAM) {
1515 					nfsrv_zapsock(slp); // drop connection
1516 				}
1517 			}
1518 			lck_rw_done(&slp->ns_rwlock);
1519 		}
1520 		if (!nd) {
1521 			/* drop our reference on the socket */
1522 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1523 			nfsd->nfsd_slp = NULL;
1524 			nfsrv_slpderef(slp);
1525 		}
1526 	}
1527 	lck_mtx_lock(&nfsd_mutex);
1528 done:
1529 	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
1530 	kfree_type(struct nfsd, nfsd);
1531 	if (--nfsd_thread_count == 0) {
1532 		nfsrv_cleanup();
1533 	}
1534 	lck_mtx_unlock(&nfsd_mutex);
1535 	return error;
1536 }
1537 
1538 int
nfssvc_export(user_addr_t argp)1539 nfssvc_export(user_addr_t argp)
1540 {
1541 	int error = 0, is_64bit;
1542 	struct user_nfs_export_args unxa;
1543 	vfs_context_t ctx = vfs_context_current();
1544 
1545 	is_64bit = vfs_context_is64bit(ctx);
1546 
1547 	/* copy in pointers to path and export args */
1548 	if (is_64bit) {
1549 		error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1550 	} else {
1551 		struct nfs_export_args tnxa;
1552 		error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1553 		if (error == 0) {
1554 			/* munge into LP64 version of nfs_export_args structure */
1555 			unxa.nxa_fsid = tnxa.nxa_fsid;
1556 			unxa.nxa_expid = tnxa.nxa_expid;
1557 			unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1558 			unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1559 			unxa.nxa_flags = tnxa.nxa_flags;
1560 			unxa.nxa_netcount = tnxa.nxa_netcount;
1561 			unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1562 		}
1563 	}
1564 	if (error) {
1565 		return error;
1566 	}
1567 
1568 	error = nfsrv_export(&unxa, ctx);
1569 
1570 	return error;
1571 }
1572 
1573 int
nfssvc_exportstats(proc_t p,user_addr_t argp)1574 nfssvc_exportstats(proc_t p, user_addr_t argp)
1575 {
1576 	int error = 0;
1577 	uint pos;
1578 	struct nfs_exportfs *nxfs;
1579 	struct nfs_export *nx;
1580 	struct nfs_export_stat_desc stat_desc = {};
1581 	struct nfs_export_stat_rec statrec;
1582 	uint numExports, totlen, count;
1583 	size_t numRecs;
1584 	user_addr_t oldp, newlenp;
1585 	user_size_t oldlen, newlen;
1586 	struct user_iovec iov[2];
1587 
1588 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1589 	if (error) {
1590 		return error;
1591 	}
1592 
1593 	oldp = iov[0].iov_base;
1594 	oldlen = iov[0].iov_len;
1595 	newlenp = iov[1].iov_base;
1596 	newlen = iov[1].iov_len;
1597 
1598 	/* setup export stat descriptor */
1599 	stat_desc.rec_vers = NFS_EXPORT_STAT_REC_VERSION;
1600 
1601 	if (!nfsrv_is_initialized()) {
1602 		stat_desc.rec_count = 0;
1603 		if (oldp && (oldlen >= sizeof(struct nfs_export_stat_desc))) {
1604 			error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1605 		}
1606 		size_t stat_desc_size = sizeof(struct nfs_export_stat_desc);
1607 		if (!error && newlenp && newlen >= sizeof(stat_desc_size)) {
1608 			error = copyout(&stat_desc_size, newlenp, sizeof(stat_desc_size));
1609 		}
1610 		return error;
1611 	}
1612 
1613 	/* Count the number of exported directories */
1614 	lck_rw_lock_shared(&nfsrv_export_rwlock);
1615 	numExports = 0;
1616 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next)
1617 	LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next)
1618 	numExports += 1;
1619 
1620 	/* update stat descriptor's export record count */
1621 	stat_desc.rec_count = numExports;
1622 
1623 	/* calculate total size of required buffer */
1624 	totlen = sizeof(struct nfs_export_stat_desc) + (numExports * sizeof(struct nfs_export_stat_rec));
1625 
1626 	/* Check caller's buffer */
1627 	if (oldp == 0 || newlenp == 0) {
1628 		lck_rw_done(&nfsrv_export_rwlock);
1629 		/* indicate required buffer len */
1630 		if (newlenp && newlen >= sizeof(totlen)) {
1631 			error = copyout(&totlen, newlenp, sizeof(totlen));
1632 		}
1633 		return error;
1634 	}
1635 
1636 	/* We require the caller's buffer to be at least large enough to hold the descriptor */
1637 	if (oldlen < sizeof(struct nfs_export_stat_desc) || newlen < sizeof(totlen)) {
1638 		lck_rw_done(&nfsrv_export_rwlock);
1639 		/* indicate required buffer len */
1640 		if (newlenp && newlen >= sizeof(totlen)) {
1641 			(void)copyout(&totlen, newlenp, sizeof(totlen));
1642 		}
1643 		return ENOMEM;
1644 	}
1645 
1646 	/* indicate required buffer len */
1647 	error = copyout(&totlen, newlenp, sizeof(totlen));
1648 	if (error) {
1649 		lck_rw_done(&nfsrv_export_rwlock);
1650 		return error;
1651 	}
1652 
1653 	/* check if export table is empty */
1654 	if (!numExports) {
1655 		lck_rw_done(&nfsrv_export_rwlock);
1656 		error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1657 		return error;
1658 	}
1659 
1660 	/* calculate how many actual export stat records fit into caller's buffer */
1661 	numRecs = (totlen - sizeof(struct nfs_export_stat_desc)) / sizeof(struct nfs_export_stat_rec);
1662 
1663 	if (!numRecs) {
1664 		/* caller's buffer can only accomodate descriptor */
1665 		lck_rw_done(&nfsrv_export_rwlock);
1666 		stat_desc.rec_count = 0;
1667 		error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1668 		return error;
1669 	}
1670 
1671 	/* adjust to actual number of records to copyout to caller's buffer */
1672 	if (numRecs > numExports) {
1673 		numRecs = numExports;
1674 	}
1675 
1676 	/* set actual number of records we are returning */
1677 	stat_desc.rec_count = numRecs;
1678 
1679 	/* first copy out the stat descriptor */
1680 	pos = 0;
1681 	error = copyout(&stat_desc, oldp + pos, sizeof(struct nfs_export_stat_desc));
1682 	if (error) {
1683 		lck_rw_done(&nfsrv_export_rwlock);
1684 		return error;
1685 	}
1686 	pos += sizeof(struct nfs_export_stat_desc);
1687 
1688 	/* Loop through exported directories */
1689 	count = 0;
1690 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
1691 		LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
1692 			if (count >= numRecs) {
1693 				break;
1694 			}
1695 
1696 			/* build exported filesystem path */
1697 			memset(statrec.path, 0, sizeof(statrec.path));
1698 			snprintf(statrec.path, sizeof(statrec.path), "%s%s%s",
1699 			    nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
1700 			    nx->nx_path);
1701 
1702 			/* build the 64-bit export stat counters */
1703 			statrec.ops = ((uint64_t)nx->nx_stats.ops.hi << 32) |
1704 			    nx->nx_stats.ops.lo;
1705 			statrec.bytes_read = ((uint64_t)nx->nx_stats.bytes_read.hi << 32) |
1706 			    nx->nx_stats.bytes_read.lo;
1707 			statrec.bytes_written = ((uint64_t)nx->nx_stats.bytes_written.hi << 32) |
1708 			    nx->nx_stats.bytes_written.lo;
1709 			error = copyout(&statrec, oldp + pos, sizeof(statrec));
1710 			if (error) {
1711 				lck_rw_done(&nfsrv_export_rwlock);
1712 				return error;
1713 			}
1714 			/* advance buffer position */
1715 			pos += sizeof(statrec);
1716 		}
1717 	}
1718 	lck_rw_done(&nfsrv_export_rwlock);
1719 
1720 	return error;
1721 }
1722 
1723 int
nfssvc_userstats(proc_t p,user_addr_t argp)1724 nfssvc_userstats(proc_t p, user_addr_t argp)
1725 {
1726 	int error = 0;
1727 	struct nfs_exportfs *nxfs;
1728 	struct nfs_export *nx;
1729 	struct nfs_active_user_list *ulist;
1730 	struct nfs_user_stat_desc ustat_desc = {};
1731 	struct nfs_user_stat_node *unode, *unode_next;
1732 	struct nfs_user_stat_user_rec ustat_rec;
1733 	struct nfs_user_stat_path_rec upath_rec;
1734 	uint bytes_total, recs_copied, pos;
1735 	size_t bytes_avail;
1736 	user_addr_t oldp, newlenp;
1737 	user_size_t oldlen, newlen;
1738 	struct user_iovec iov[2];
1739 
1740 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1741 	if (error) {
1742 		return error;
1743 	}
1744 
1745 	oldp = iov[0].iov_base;
1746 	oldlen = iov[0].iov_len;
1747 	newlenp = iov[1].iov_base;
1748 	newlen = iov[1].iov_len;
1749 
1750 	/* init structures used for copying out of kernel */
1751 	ustat_desc.rec_vers = NFS_USER_STAT_REC_VERSION;
1752 	ustat_rec.rec_type = NFS_USER_STAT_USER_REC;
1753 	upath_rec.rec_type = NFS_USER_STAT_PATH_REC;
1754 
1755 	/* initialize counters */
1756 	bytes_total = sizeof(struct nfs_user_stat_desc);
1757 	bytes_avail  = oldlen;
1758 	recs_copied = 0;
1759 
1760 	if (!nfsrv_is_initialized()) { /* NFS server not initialized, so no stats */
1761 		goto ustat_skip;
1762 	}
1763 
1764 	/* reclaim old expired user nodes */
1765 	nfsrv_active_user_list_reclaim();
1766 
1767 	/* reserve space for the buffer descriptor */
1768 	if (bytes_avail >= sizeof(struct nfs_user_stat_desc)) {
1769 		bytes_avail -= sizeof(struct nfs_user_stat_desc);
1770 	} else {
1771 		bytes_avail = 0;
1772 	}
1773 
1774 	/* put buffer position past the buffer descriptor */
1775 	pos = sizeof(struct nfs_user_stat_desc);
1776 
1777 	/* Loop through exported directories */
1778 	lck_rw_lock_shared(&nfsrv_export_rwlock);
1779 	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
1780 		LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
1781 			/* copy out path */
1782 			if (bytes_avail >= sizeof(struct nfs_user_stat_path_rec)) {
1783 				memset(upath_rec.path, 0, sizeof(upath_rec.path));
1784 				snprintf(upath_rec.path, sizeof(upath_rec.path), "%s%s%s",
1785 				    nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
1786 				    nx->nx_path);
1787 
1788 				error = copyout(&upath_rec, oldp + pos, sizeof(struct nfs_user_stat_path_rec));
1789 				if (error) {
1790 					/* punt */
1791 					goto ustat_done;
1792 				}
1793 
1794 				pos += sizeof(struct nfs_user_stat_path_rec);
1795 				bytes_avail -= sizeof(struct nfs_user_stat_path_rec);
1796 				recs_copied++;
1797 			} else {
1798 				/* Caller's buffer is exhausted */
1799 				bytes_avail = 0;
1800 			}
1801 
1802 			bytes_total += sizeof(struct nfs_user_stat_path_rec);
1803 
1804 			/* Scan through all user nodes of this export */
1805 			ulist = &nx->nx_user_list;
1806 			lck_mtx_lock(&ulist->user_mutex);
1807 			for (unode = TAILQ_FIRST(&ulist->user_lru); unode; unode = unode_next) {
1808 				unode_next = TAILQ_NEXT(unode, lru_link);
1809 
1810 				/* copy out node if there is space */
1811 				if (bytes_avail >= sizeof(struct nfs_user_stat_user_rec)) {
1812 					/* prepare a user stat rec for copying out */
1813 					ustat_rec.uid = unode->uid;
1814 					memset(&ustat_rec.sock, 0, sizeof(ustat_rec.sock));
1815 					bcopy(&unode->sock, &ustat_rec.sock, unode->sock.ss_len);
1816 					ustat_rec.ops = unode->ops;
1817 					ustat_rec.bytes_read = unode->bytes_read;
1818 					ustat_rec.bytes_written = unode->bytes_written;
1819 					ustat_rec.tm_start = unode->tm_start;
1820 					ustat_rec.tm_last = unode->tm_last;
1821 
1822 					error = copyout(&ustat_rec, oldp + pos, sizeof(struct nfs_user_stat_user_rec));
1823 
1824 					if (error) {
1825 						/* punt */
1826 						lck_mtx_unlock(&ulist->user_mutex);
1827 						goto ustat_done;
1828 					}
1829 
1830 					pos += sizeof(struct nfs_user_stat_user_rec);
1831 					bytes_avail -= sizeof(struct nfs_user_stat_user_rec);
1832 					recs_copied++;
1833 				} else {
1834 					/* Caller's buffer is exhausted */
1835 					bytes_avail = 0;
1836 				}
1837 				bytes_total += sizeof(struct nfs_user_stat_user_rec);
1838 			}
1839 			/* can unlock this export's list now */
1840 			lck_mtx_unlock(&ulist->user_mutex);
1841 		}
1842 	}
1843 
1844 ustat_done:
1845 	/* unlock the export table */
1846 	lck_rw_done(&nfsrv_export_rwlock);
1847 
1848 ustat_skip:
1849 	/* indicate number of actual records copied */
1850 	ustat_desc.rec_count = recs_copied;
1851 
1852 	if (!error) {
1853 		/* check if there was enough room for the buffer descriptor */
1854 		if (oldlen >= sizeof(struct nfs_user_stat_desc)) {
1855 			error = copyout(&ustat_desc, oldp, sizeof(struct nfs_user_stat_desc));
1856 		} else {
1857 			error = ENOMEM;
1858 		}
1859 
1860 		/* always indicate required buffer size */
1861 		if (!error && newlenp && newlen >= sizeof(bytes_total)) {
1862 			error = copyout(&bytes_total, newlenp, sizeof(bytes_total));
1863 		}
1864 	}
1865 	return error;
1866 }
1867 
1868 int
nfssvc_usercount(proc_t p,user_addr_t argp)1869 nfssvc_usercount(proc_t p, user_addr_t argp)
1870 {
1871 	int error;
1872 	user_addr_t oldp, newlenp;
1873 	user_size_t oldlen, newlen;
1874 	struct user_iovec iov[2];
1875 	size_t stat_size = sizeof(nfsrv_user_stat_node_count);
1876 
1877 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1878 	if (error) {
1879 		return error;
1880 	}
1881 
1882 	oldp = iov[0].iov_base;
1883 	oldlen = iov[0].iov_len;
1884 	newlenp = iov[1].iov_base;
1885 	newlen = iov[1].iov_len;
1886 
1887 	if (!oldp) {
1888 		if (newlenp && newlen >= sizeof(stat_size)) {
1889 			error = copyout(&stat_size, newlenp, sizeof(stat_size));
1890 		}
1891 		return error;
1892 	}
1893 
1894 	if (oldlen < stat_size) {
1895 		if (newlenp && newlen >= sizeof(stat_size)) {
1896 			(void)copyout(&stat_size, newlenp, sizeof(stat_size));
1897 		}
1898 		return ENOMEM;
1899 	}
1900 
1901 	if (nfsrv_is_initialized()) {
1902 		/* reclaim old expired user nodes */
1903 		nfsrv_active_user_list_reclaim();
1904 	}
1905 
1906 	error = copyout(&nfsrv_user_stat_node_count, oldp, sizeof(nfsrv_user_stat_node_count));
1907 
1908 	return error;
1909 }
1910 
1911 int
nfssvc_zerostats(void)1912 nfssvc_zerostats(void)
1913 {
1914 	bzero(&nfsrvstats, sizeof nfsrvstats);
1915 	return 0;
1916 }
1917 
1918 int
nfssvc_srvstats(proc_t p,user_addr_t argp)1919 nfssvc_srvstats(proc_t p, user_addr_t argp)
1920 {
1921 	int error;
1922 	user_addr_t oldp, newlenp;
1923 	user_size_t oldlen, newlen;
1924 	struct user_iovec iov[2];
1925 	size_t stat_size = sizeof(nfsrvstats);
1926 
1927 	error = copyin_user_iovec_array(argp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 2, iov);
1928 	if (error) {
1929 		return error;
1930 	}
1931 
1932 	oldp = iov[0].iov_base;
1933 	oldlen = iov[0].iov_len;
1934 	newlenp = iov[1].iov_base;
1935 	newlen = iov[1].iov_len;
1936 
1937 	if (!oldp) {
1938 		if (newlenp && newlen >= sizeof(stat_size)) {
1939 			error = copyout(&stat_size, newlenp, sizeof(stat_size));
1940 		}
1941 		return error;
1942 	}
1943 
1944 	if (oldlen < stat_size) {
1945 		if (newlenp && newlen >= sizeof(stat_size)) {
1946 			(void)copyout(&stat_size, newlenp, sizeof(stat_size));
1947 		}
1948 		return ENOMEM;
1949 	}
1950 
1951 	error = copyout(&nfsrvstats, oldp, stat_size);
1952 	if (error) {
1953 		return error;
1954 	}
1955 
1956 	return 0;
1957 }
1958 
1959 /*
1960  * Shut down a socket associated with an nfsrv_sock structure.
1961  * Should be called with the send lock set, if required.
1962  * The trick here is to increment the sref at the start, so that the nfsds
1963  * will stop using it and clear ns_flag at the end so that it will not be
1964  * reassigned during cleanup.
1965  */
1966 void
nfsrv_zapsock(struct nfsrv_sock * slp)1967 nfsrv_zapsock(struct nfsrv_sock *slp)
1968 {
1969 	socket_t so;
1970 
1971 	if ((slp->ns_flag & SLP_VALID) == 0) {
1972 		return;
1973 	}
1974 	slp->ns_flag &= ~SLP_ALLFLAGS;
1975 
1976 	so = slp->ns_so;
1977 	if (so == NULL) {
1978 		return;
1979 	}
1980 
1981 	sock_setupcall(so, NULL, NULL);
1982 	sock_shutdown(so, SHUT_RDWR);
1983 
1984 	/*
1985 	 * Remove from the up-call queue
1986 	 */
1987 	nfsrv_uc_dequeue(slp);
1988 }
1989 
1990 /*
1991  * cleanup and release a server socket structure.
1992  */
1993 void
nfsrv_slpfree(struct nfsrv_sock * slp)1994 nfsrv_slpfree(struct nfsrv_sock *slp)
1995 {
1996 	struct nfsrv_descript *nwp, *nnwp;
1997 
1998 	if (slp->ns_so) {
1999 		sock_release(slp->ns_so);
2000 		slp->ns_so = NULL;
2001 	}
2002 	if (slp->ns_nam) {
2003 		mbuf_free(slp->ns_nam);
2004 	}
2005 	if (slp->ns_raw) {
2006 		mbuf_freem(slp->ns_raw);
2007 	}
2008 	if (slp->ns_rec) {
2009 		mbuf_freem(slp->ns_rec);
2010 	}
2011 	if (slp->ns_frag) {
2012 		mbuf_freem(slp->ns_frag);
2013 	}
2014 	slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
2015 	slp->ns_reccnt = 0;
2016 
2017 	for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
2018 		nnwp = nwp->nd_tq.le_next;
2019 		LIST_REMOVE(nwp, nd_tq);
2020 		nfsm_chain_cleanup(&nwp->nd_nmreq);
2021 		if (nwp->nd_mrep) {
2022 			mbuf_freem(nwp->nd_mrep);
2023 		}
2024 		if (nwp->nd_nam2) {
2025 			mbuf_freem(nwp->nd_nam2);
2026 		}
2027 		if (IS_VALID_CRED(nwp->nd_cr)) {
2028 			kauth_cred_unref(&nwp->nd_cr);
2029 		}
2030 		if (nwp->nd_gss_context) {
2031 			nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
2032 		}
2033 		NFS_ZFREE(nfsrv_descript_zone, nwp);
2034 	}
2035 	LIST_INIT(&slp->ns_tq);
2036 
2037 	lck_rw_destroy(&slp->ns_rwlock, &nfsrv_slp_rwlock_group);
2038 	lck_mtx_destroy(&slp->ns_wgmutex, &nfsrv_slp_mutex_group);
2039 	kfree_type(struct nfsrv_sock, slp);
2040 }
2041 
2042 /*
2043  * Derefence a server socket structure. If it has no more references and
2044  * is no longer valid, you can throw it away.
2045  */
2046 static void
nfsrv_slpderef_locked(struct nfsrv_sock * slp)2047 nfsrv_slpderef_locked(struct nfsrv_sock *slp)
2048 {
2049 	lck_rw_lock_exclusive(&slp->ns_rwlock);
2050 	slp->ns_sref--;
2051 
2052 	if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
2053 		if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
2054 			/* remove socket from queue since there's no work */
2055 			if (slp->ns_flag & SLP_WAITQ) {
2056 				TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
2057 			} else {
2058 				TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
2059 			}
2060 			slp->ns_flag &= ~SLP_QUEUED;
2061 		}
2062 		lck_rw_done(&slp->ns_rwlock);
2063 		return;
2064 	}
2065 
2066 	/* This socket is no longer valid, so we'll get rid of it */
2067 
2068 	if (slp->ns_flag & SLP_QUEUED) {
2069 		if (slp->ns_flag & SLP_WAITQ) {
2070 			TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
2071 		} else {
2072 			TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
2073 		}
2074 		slp->ns_flag &= ~SLP_QUEUED;
2075 	}
2076 	lck_rw_done(&slp->ns_rwlock);
2077 
2078 	TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
2079 	if (slp->ns_sotype == SOCK_STREAM) {
2080 		nfsrv_sock_tcp_cnt--;
2081 	}
2082 
2083 	/* now remove from the write gather socket list */
2084 	if (slp->ns_wgq.tqe_next != SLPNOLIST) {
2085 		TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
2086 		slp->ns_wgq.tqe_next = SLPNOLIST;
2087 	}
2088 	nfsrv_slpfree(slp);
2089 }
2090 
2091 void
nfsrv_slpderef(struct nfsrv_sock * slp)2092 nfsrv_slpderef(struct nfsrv_sock *slp)
2093 {
2094 	lck_mtx_lock(&nfsd_mutex);
2095 	nfsrv_slpderef_locked(slp);
2096 	lck_mtx_unlock(&nfsd_mutex);
2097 }
2098 
2099 /*
2100  * Check periodically for idle sockest if needed and
2101  * zap them.
2102  */
2103 void
nfsrv_idlesock_timer(__unused void * param0,__unused void * param1)2104 nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
2105 {
2106 	struct nfsrv_sock *slp, *tslp;
2107 	struct timeval now;
2108 	time_t time_to_wait = nfsrv_sock_idle_timeout;
2109 
2110 	microuptime(&now);
2111 	lck_mtx_lock(&nfsd_mutex);
2112 
2113 	/* Turn off the timer if we're suppose to and get out */
2114 	if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) {
2115 		nfsrv_sock_idle_timeout = 0;
2116 	}
2117 	if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
2118 		nfsrv_idlesock_timer_on = 0;
2119 		lck_mtx_unlock(&nfsd_mutex);
2120 		return;
2121 	}
2122 
2123 	TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
2124 		lck_rw_lock_exclusive(&slp->ns_rwlock);
2125 		/* Skip udp and referenced sockets */
2126 		if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
2127 			lck_rw_done(&slp->ns_rwlock);
2128 			continue;
2129 		}
2130 		/*
2131 		 * If this is the first non-referenced socket that hasn't idle out,
2132 		 * use its time stamp to calculate the earlist time in the future
2133 		 * to start the next invocation of the timer. Since the nfsrv_socklist
2134 		 * is sorted oldest access to newest. Once we find the first one,
2135 		 * we're done and break out of the loop.
2136 		 */
2137 		if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) ||
2138 		    nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
2139 			time_to_wait -= now.tv_sec - slp->ns_timestamp;
2140 			if (time_to_wait < 1) {
2141 				time_to_wait = 1;
2142 			}
2143 			lck_rw_done(&slp->ns_rwlock);
2144 			break;
2145 		}
2146 		/*
2147 		 * Bump the ref count. nfsrv_slpderef below will destroy
2148 		 * the socket, since nfsrv_zapsock has closed it.
2149 		 */
2150 		slp->ns_sref++;
2151 		nfsrv_zapsock(slp);
2152 		lck_rw_done(&slp->ns_rwlock);
2153 		nfsrv_slpderef_locked(slp);
2154 	}
2155 
2156 	/* Start ourself back up */
2157 	nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
2158 	/* Remember when the next timer will fire for nfssvc_addsock. */
2159 	nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
2160 	lck_mtx_unlock(&nfsd_mutex);
2161 }
2162 
2163 /*
2164  * Clean up the data structures for the server.
2165  */
2166 void
nfsrv_cleanup(void)2167 nfsrv_cleanup(void)
2168 {
2169 	struct nfsrv_sock *slp, *nslp;
2170 	struct timeval now;
2171 #if CONFIG_FSE
2172 	struct nfsrv_fmod *fp, *nfp;
2173 	int i;
2174 #endif
2175 
2176 	microuptime(&now);
2177 	for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
2178 		nslp = TAILQ_NEXT(slp, ns_chain);
2179 		lck_rw_lock_exclusive(&slp->ns_rwlock);
2180 		slp->ns_sref++;
2181 		if (slp->ns_flag & SLP_VALID) {
2182 			nfsrv_zapsock(slp);
2183 		}
2184 		lck_rw_done(&slp->ns_rwlock);
2185 		nfsrv_slpderef_locked(slp);
2186 	}
2187 #
2188 #if CONFIG_FSE
2189 	/*
2190 	 * Flush pending file write fsevents
2191 	 */
2192 	lck_mtx_lock(&nfsrv_fmod_mutex);
2193 	for (i = 0; i < NFSRVFMODHASHSZ; i++) {
2194 		for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
2195 			/*
2196 			 * Fire off the content modified fsevent for each
2197 			 * entry, remove it from the list, and free it.
2198 			 */
2199 			if (nfsrv_fsevents_enabled) {
2200 				fp->fm_context.vc_thread = current_thread();
2201 				add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
2202 				    FSE_ARG_VNODE, fp->fm_vp,
2203 				    FSE_ARG_DONE);
2204 			}
2205 			vnode_put(fp->fm_vp);
2206 			kauth_cred_unref(&fp->fm_context.vc_ucred);
2207 			nfp = LIST_NEXT(fp, fm_link);
2208 			LIST_REMOVE(fp, fm_link);
2209 			kfree_type(struct nfsrv_fmod, fp);
2210 		}
2211 	}
2212 	nfsrv_fmod_pending = 0;
2213 	lck_mtx_unlock(&nfsrv_fmod_mutex);
2214 #endif
2215 
2216 	nfsrv_uc_cleanup();     /* Stop nfs socket up-call threads */
2217 
2218 	nfs_gss_svc_cleanup();  /* Remove any RPCSEC_GSS contexts */
2219 
2220 	nfsrv_cleancache();     /* And clear out server cache */
2221 
2222 	nfsrv_udpsock = NULL;
2223 	nfsrv_udp6sock = NULL;
2224 }
2225 
2226 #endif /* CONFIG_NFS_SERVER */
2227