xref: /xnu-11215.1.10/bsd/nfs/nfs_socket.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1989, 1991, 1993, 1995
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * Rick Macklem at The University of Guelph.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)nfs_socket.c	8.5 (Berkeley) 3/30/95
65  * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
66  */
67 
68 #include <nfs/nfs_conf.h>
69 #if CONFIG_NFS_SERVER
70 
71 /*
72  * Socket operations for use by nfs
73  */
74 
75 #include <sys/systm.h>
76 #include <sys/kauth.h>
77 #include <sys/mount_internal.h>
78 #include <sys/kpi_mbuf.h>
79 #include <IOKit/IOLib.h>
80 
81 #include <netinet/in.h>
82 
83 #include <nfs/rpcv2.h>
84 #include <nfs/nfsproto.h>
85 #include <nfs/nfs.h>
86 #include <nfs/xdr_subs.h>
87 #include <nfs/nfsm_subs.h>
88 #include <nfs/nfs_gss.h>
89 
90 ZONE_DEFINE(nfsrv_descript_zone, "NFSV3 srvdesc",
91     sizeof(struct nfsrv_descript), ZC_NONE);
92 
93 int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */
94 
95 uint32_t nfsrv_unprocessed_rpc_current = 0; /* Current bytes of unprocessed RPC records */
96 uint32_t nfsrv_unprocessed_rpc_max = (64 * 1024 * 1024); /* Max bytes of unprocessed RPC records - 64MB by default */
97 
98 int nfsrv_getstream(struct nfsrv_sock *, int);
99 int nfsrv_getreq(struct nfsrv_descript *);
100 extern int nfsv3_procid[NFS_NPROCS];
101 
102 #define NFS_TRYLOCK_MSEC_SLEEP 1
103 
104 const nfserr_info_t nfserrs_common[NFSERR_INFO_COMMON_SIZE] = {
105 	NFSERR_INFO_COMMON
106 };
107 
108 #ifndef ARRAY_SIZE
109 #define ARRAY_SIZE(A) (sizeof(A) / sizeof(A[0]))
110 #endif
111 
112 static int
is_error_in_range(const nfserr_info_t * arr,int arr_size,int error)113 is_error_in_range(const nfserr_info_t *arr, int arr_size, int error)
114 {
115 	if (arr_size == 0) {
116 		return 0;
117 	}
118 	return error >= arr[0].nei_error && error <= arr[arr_size - 1].nei_error;
119 }
120 
121 static void
nfsstat_update_nfserror(int error)122 nfsstat_update_nfserror(int error)
123 {
124 	if (is_error_in_range(nfserrs_common, ARRAY_SIZE(nfserrs_common), error)) {
125 		for (uint32_t i = 0; i < ARRAY_SIZE(nfserrs_common); i++) {
126 			if (error == nfserrs_common[i].nei_error) {
127 				nfsrvstats.nfs_errs.errs_common[nfserrs_common[i].nei_index]++;
128 				return;
129 			}
130 		}
131 	}
132 
133 	/* Unknown error */
134 	nfsrvstats.nfs_errs.errs_unknown++;
135 }
136 
137 /*
138  * compare two sockaddr structures
139  */
140 int
nfs_sockaddr_cmp(struct sockaddr * sa1,struct sockaddr * sa2)141 nfs_sockaddr_cmp(struct sockaddr *sa1, struct sockaddr *sa2)
142 {
143 	if (!sa1) {
144 		return -1;
145 	}
146 	if (!sa2) {
147 		return 1;
148 	}
149 	if (sa1->sa_family != sa2->sa_family) {
150 		return (sa1->sa_family < sa2->sa_family) ? -1 : 1;
151 	}
152 	if (sa1->sa_len != sa2->sa_len) {
153 		return (sa1->sa_len < sa2->sa_len) ? -1 : 1;
154 	}
155 	if (sa1->sa_family == AF_INET) {
156 		return bcmp(&((struct sockaddr_in*)sa1)->sin_addr,
157 		           &((struct sockaddr_in*)sa2)->sin_addr, sizeof(((struct sockaddr_in*)sa1)->sin_addr));
158 	}
159 	if (sa1->sa_family == AF_INET6) {
160 		return bcmp(&((struct sockaddr_in6*)sa1)->sin6_addr,
161 		           &((struct sockaddr_in6*)sa2)->sin6_addr, sizeof(((struct sockaddr_in6*)sa1)->sin6_addr));
162 	}
163 	return -1;
164 }
165 
166 /*
167  * Generate the rpc reply header
168  * siz arg. is used to decide if adding a cluster is worthwhile
169  */
170 int
nfsrv_rephead(struct nfsrv_descript * nd,__unused struct nfsrv_sock * slp,struct nfsm_chain * nmrepp,size_t siz)171 nfsrv_rephead(
172 	struct nfsrv_descript *nd,
173 	__unused struct nfsrv_sock *slp,
174 	struct nfsm_chain *nmrepp,
175 	size_t siz)
176 {
177 	mbuf_t mrep;
178 	u_int32_t *tl;
179 	struct nfsm_chain nmrep;
180 	int err, error, mappederr;
181 
182 	err = nd->nd_repstat;
183 	if (err && (nd->nd_vers == NFS_VER2)) {
184 		siz = 0;
185 	}
186 
187 	/*
188 	 * If this is a big reply, use a cluster else
189 	 * try and leave leading space for the lower level headers.
190 	 */
191 	siz += RPC_REPLYSIZ;
192 	if (siz >= nfs_mbuf_minclsize) {
193 		error = mbuf_getpacket(MBUF_WAITOK, &mrep);
194 	} else {
195 		error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep);
196 	}
197 	if (error) {
198 		/* unable to allocate packet */
199 		/* XXX should we keep statistics for these errors? */
200 		return error;
201 	}
202 	if (siz < nfs_mbuf_minclsize) {
203 		/* leave space for lower level headers */
204 		tl = mbuf_data(mrep);
205 		tl += 80 / sizeof(*tl);  /* XXX max_hdr? XXX */
206 		mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED);
207 	}
208 	nfsm_chain_init(&nmrep, mrep);
209 	nfsm_chain_add_32(error, &nmrep, nd->nd_retxid);
210 	nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
211 	if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
212 		nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
213 		if (err & NFSERR_AUTHERR) {
214 			nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
215 			nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR));
216 		} else {
217 			nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
218 			nfsm_chain_add_32(error, &nmrep, RPC_VER2);
219 			nfsm_chain_add_32(error, &nmrep, RPC_VER2);
220 		}
221 	} else {
222 		/* reply status */
223 		nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
224 		if (nd->nd_gss_context != NULL) {
225 			/* RPCSEC_GSS verifier */
226 			error = nfs_gss_svc_verf_put(nd, &nmrep);
227 			if (error) {
228 				nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR);
229 				goto done;
230 			}
231 		} else {
232 			/* RPCAUTH_NULL verifier */
233 			nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
234 			nfsm_chain_add_32(error, &nmrep, 0);
235 		}
236 		/* accepted status */
237 		switch (err) {
238 		case EPROGUNAVAIL:
239 			nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
240 			break;
241 		case EPROGMISMATCH:
242 			nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
243 			/* XXX hard coded versions? */
244 			nfsm_chain_add_32(error, &nmrep, NFS_VER2);
245 			nfsm_chain_add_32(error, &nmrep, NFS_VER3);
246 			break;
247 		case EPROCUNAVAIL:
248 			nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
249 			break;
250 		case EBADRPC:
251 			nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
252 			break;
253 		default:
254 			nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
255 			if (nd->nd_gss_context != NULL) {
256 				error = nfs_gss_svc_prepare_reply(nd, &nmrep);
257 			}
258 			if (err != NFSERR_RETVOID) {
259 				mappederr = err ? nfsrv_errmap(nd, err) : 0;
260 				nfsm_chain_add_32(error, &nmrep, mappederr);
261 				nfsstat_update_nfserror(mappederr);
262 			}
263 			break;
264 		}
265 	}
266 
267 done:
268 	nfsm_chain_build_done(error, &nmrep);
269 	if (error) {
270 		/* error composing reply header */
271 		/* XXX should we keep statistics for these errors? */
272 		mbuf_freem(mrep);
273 		return error;
274 	}
275 
276 	*nmrepp = nmrep;
277 	if ((err != 0) && (err != NFSERR_RETVOID)) {
278 		OSAddAtomic64(1, &nfsrvstats.srvrpc_errs);
279 	}
280 	return 0;
281 }
282 
283 /*
284  * The nfs server send routine.
285  *
286  * - return EINTR or ERESTART if interrupted by a signal
287  * - return EPIPE if a connection is lost for connection based sockets (TCP...)
288  * - do any cleanup required by recoverable socket errors (???)
289  */
290 int
nfsrv_send(struct nfsrv_sock * slp,mbuf_t nam,mbuf_t top)291 nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top)
292 {
293 	int error;
294 	socket_t so = slp->ns_so;
295 	struct sockaddr *sendnam;
296 	struct msghdr msg;
297 
298 	bzero(&msg, sizeof(msg));
299 	if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) {
300 		if ((sendnam = mbuf_data(nam))) {
301 			msg.msg_name = (caddr_t)sendnam;
302 			msg.msg_namelen = sendnam->sa_len;
303 		}
304 	}
305 	if (NFSRV_IS_DBG(NFSRV_FAC_SRV, 15)) {
306 		nfs_dump_mbuf(__func__, __LINE__, "nfsrv_send\n", top);
307 	}
308 	error = sock_sendmbuf(so, &msg, top, 0, NULL);
309 	if (!error) {
310 		return 0;
311 	}
312 	log(LOG_INFO, "nfsd send error %d\n", error);
313 
314 	if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM)) {
315 		error = EPIPE;  /* zap TCP sockets if they time out on send */
316 	}
317 	/* Handle any recoverable (soft) socket errors here. (???) */
318 	if (error != EINTR && error != ERESTART && error != EIO &&
319 	    error != EWOULDBLOCK && error != EPIPE) {
320 		error = 0;
321 	}
322 
323 	return error;
324 }
325 
326 /*
327  * Socket upcall routine for the nfsd sockets.
328  * The caddr_t arg is a pointer to the "struct nfsrv_sock".
329  * Essentially do as much as possible non-blocking, else punt and it will
330  * be called with MBUF_WAITOK from an nfsd.
331  */
332 void
nfsrv_rcv(socket_t so,void * arg,int waitflag)333 nfsrv_rcv(socket_t so, void *arg, int waitflag)
334 {
335 	struct nfsrv_sock *slp = arg;
336 
337 	while (1) {
338 		if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID)) {
339 			return;
340 		}
341 		if (lck_rw_try_lock_exclusive(&slp->ns_rwlock)) {
342 			/* Exclusive lock acquired */
343 			break;
344 		}
345 		IOSleep(NFS_TRYLOCK_MSEC_SLEEP);
346 	}
347 
348 	nfsrv_rcv_locked(so, slp, waitflag);
349 	/* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
350 }
351 void
nfsrv_rcv_locked(socket_t so,struct nfsrv_sock * slp,int waitflag)352 nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
353 {
354 	mbuf_t m, mp, mhck, m2;
355 	int ns_flag = 0, error;
356 	struct msghdr   msg;
357 	size_t bytes_read;
358 
359 	if ((slp->ns_flag & SLP_VALID) == 0) {
360 		if (waitflag == MBUF_DONTWAIT) {
361 			lck_rw_done(&slp->ns_rwlock);
362 		}
363 		return;
364 	}
365 
366 #ifdef notdef
367 	/*
368 	 * Define this to test for nfsds handling this under heavy load.
369 	 */
370 	if (waitflag == MBUF_DONTWAIT) {
371 		ns_flag = SLP_NEEDQ;
372 		goto dorecs;
373 	}
374 #endif
375 	if (slp->ns_sotype == SOCK_STREAM) {
376 		/*
377 		 * If there are already records on the queue, defer soreceive()
378 		 * to an(other) nfsd so that there is feedback to the TCP layer that
379 		 * the nfs servers are heavily loaded.
380 		 */
381 		if (slp->ns_rec) {
382 			ns_flag = SLP_NEEDQ;
383 			goto dorecs;
384 		}
385 
386 		/*
387 		 * Do soreceive().
388 		 */
389 		bytes_read = 1000000000;
390 		error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read);
391 		if (error || mp == NULL) {
392 			if (error == EWOULDBLOCK) {
393 				ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0;
394 			} else {
395 				ns_flag = SLP_DISCONN;
396 			}
397 			goto dorecs;
398 		}
399 		m = mp;
400 		if (slp->ns_rawend) {
401 			if ((error = mbuf_setnext(slp->ns_rawend, m))) {
402 				panic("nfsrv_rcv: mbuf_setnext failed %d", error);
403 			}
404 			slp->ns_cc += bytes_read;
405 		} else {
406 			slp->ns_raw = m;
407 			slp->ns_cc = bytes_read;
408 		}
409 		while ((m2 = mbuf_next(m))) {
410 			m = m2;
411 		}
412 		slp->ns_rawend = m;
413 
414 		/*
415 		 * Now try and parse record(s) out of the raw stream data.
416 		 */
417 		error = nfsrv_getstream(slp, waitflag);
418 		if (error) {
419 			if (error == EWOULDBLOCK) {
420 				ns_flag = SLP_NEEDQ;
421 			} else {
422 				ns_flag = SLP_DISCONN;
423 			}
424 		}
425 	} else {
426 		struct sockaddr_storage nam;
427 
428 		if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) {
429 			/* already have max # RPC records queued on this socket */
430 			ns_flag = SLP_NEEDQ;
431 			goto dorecs;
432 		}
433 
434 		bzero(&msg, sizeof(msg));
435 		msg.msg_name = (caddr_t)&nam;
436 		msg.msg_namelen = sizeof(nam);
437 
438 		do {
439 			bytes_read = 1000000000;
440 			error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
441 			if (mp) {
442 				if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) {
443 					mbuf_setlen(mhck, nam.ss_len);
444 					bcopy(&nam, mbuf_data(mhck), nam.ss_len);
445 					m = mhck;
446 					if (mbuf_setnext(m, mp)) {
447 						/* trouble... just drop it */
448 						printf("nfsrv_rcv: mbuf_setnext failed\n");
449 						mbuf_free(mhck);
450 						m = mp;
451 					}
452 				} else {
453 					m = mp;
454 				}
455 				if (slp->ns_recend) {
456 					mbuf_setnextpkt(slp->ns_recend, m);
457 				} else {
458 					slp->ns_rec = m;
459 					slp->ns_flag |= SLP_DOREC;
460 				}
461 				slp->ns_recend = m;
462 				mbuf_setnextpkt(m, NULL);
463 				slp->ns_reccnt++;
464 			}
465 		} while (mp);
466 	}
467 
468 	/*
469 	 * Now try and process the request records, non-blocking.
470 	 */
471 dorecs:
472 	if (ns_flag) {
473 		slp->ns_flag |= ns_flag;
474 	}
475 	if (waitflag == MBUF_DONTWAIT) {
476 		int wake = (slp->ns_flag & SLP_WORKTODO);
477 		lck_rw_done(&slp->ns_rwlock);
478 		if (wake && nfsd_thread_count) {
479 			while (1) {
480 				if ((slp->ns_flag & SLP_VALID) == 0) {
481 					break;
482 				}
483 				if (lck_mtx_try_lock(&nfsd_mutex)) {
484 					/* Mutex acquired */
485 					nfsrv_wakenfsd(slp);
486 					lck_mtx_unlock(&nfsd_mutex);
487 					break;
488 				}
489 				IOSleep(NFS_TRYLOCK_MSEC_SLEEP);
490 			}
491 		}
492 	}
493 }
494 
495 /*
496  * Try and extract an RPC request from the mbuf data list received on a
497  * stream socket. The "waitflag" argument indicates whether or not it
498  * can sleep.
499  */
500 int
nfsrv_getstream(struct nfsrv_sock * slp,int waitflag)501 nfsrv_getstream(struct nfsrv_sock *slp, int waitflag)
502 {
503 	mbuf_t m;
504 	char *cp1, *cp2, *mdata;
505 	int error;
506 	size_t len, mlen;
507 	mbuf_t om, m2, recm;
508 	u_int32_t recmark;
509 
510 	if (slp->ns_flag & SLP_GETSTREAM) {
511 		panic("nfs getstream");
512 	}
513 	slp->ns_flag |= SLP_GETSTREAM;
514 	for (;;) {
515 		if (slp->ns_reclen == 0) {
516 			if (slp->ns_cc < NFSX_UNSIGNED) {
517 				slp->ns_flag &= ~SLP_GETSTREAM;
518 				return 0;
519 			}
520 			m = slp->ns_raw;
521 			mdata = mbuf_data(m);
522 			mlen = mbuf_len(m);
523 			if (mlen >= NFSX_UNSIGNED) {
524 				bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED);
525 				mdata += NFSX_UNSIGNED;
526 				mlen -= NFSX_UNSIGNED;
527 				mbuf_setdata(m, mdata, mlen);
528 			} else {
529 				cp1 = (caddr_t)&recmark;
530 				cp2 = mdata;
531 				while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
532 					while (mlen == 0) {
533 						m = mbuf_next(m);
534 						cp2 = mbuf_data(m);
535 						mlen = mbuf_len(m);
536 					}
537 					*cp1++ = *cp2++;
538 					mlen--;
539 					mbuf_setdata(m, cp2, mlen);
540 				}
541 			}
542 			slp->ns_cc -= NFSX_UNSIGNED;
543 			recmark = ntohl(recmark);
544 			slp->ns_reclen = recmark & ~0x80000000;
545 			if (recmark & 0x80000000) {
546 				slp->ns_flag |= SLP_LASTFRAG;
547 			} else {
548 				slp->ns_flag &= ~SLP_LASTFRAG;
549 			}
550 			if (slp->ns_reclen <= 0 || slp->ns_reclen > NFS_MAXPACKET) {
551 				slp->ns_flag &= ~SLP_GETSTREAM;
552 				return EINVAL;
553 			}
554 			/* check if we have reached the max allowed memory consumption */
555 			if (nfsrv_unprocessed_rpc_max && (nfsrv_unprocessed_rpc_current + slp->ns_reclen > nfsrv_unprocessed_rpc_max)) {
556 				slp->ns_flag &= ~SLP_GETSTREAM;
557 				printf("nfsrv_getstream: nfsrv_unprocessed_rpc_current (%u) has reached the max allowed consumption (%u)\n", nfsrv_unprocessed_rpc_current, nfsrv_unprocessed_rpc_max);
558 				return ENOBUFS;
559 			}
560 			OSAddAtomic(slp->ns_reclen, &nfsrv_unprocessed_rpc_current);
561 			slp->ns_recslen += slp->ns_reclen;
562 		}
563 
564 		/*
565 		 * Now get the record part.
566 		 *
567 		 * Note that slp->ns_reclen may be 0.  Linux sometimes
568 		 * generates 0-length RPCs
569 		 */
570 		recm = NULL;
571 		if (slp->ns_cc == slp->ns_reclen) {
572 			recm = slp->ns_raw;
573 			slp->ns_raw = slp->ns_rawend = NULL;
574 			slp->ns_cc = slp->ns_reclen = 0;
575 		} else if (slp->ns_cc > slp->ns_reclen) {
576 			len = 0;
577 			m = slp->ns_raw;
578 			mlen = mbuf_len(m);
579 			mdata = mbuf_data(m);
580 			om = NULL;
581 			while (len < slp->ns_reclen) {
582 				if ((len + mlen) > slp->ns_reclen) {
583 					if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) {
584 						slp->ns_flag &= ~SLP_GETSTREAM;
585 						return EWOULDBLOCK;
586 					}
587 					if (om) {
588 						if (mbuf_setnext(om, m2)) {
589 							/* trouble... just drop it */
590 							printf("nfsrv_getstream: mbuf_setnext failed\n");
591 							mbuf_freem(m2);
592 							slp->ns_flag &= ~SLP_GETSTREAM;
593 							return EWOULDBLOCK;
594 						}
595 						recm = slp->ns_raw;
596 					} else {
597 						recm = m2;
598 					}
599 					mdata += slp->ns_reclen - len;
600 					mlen -= slp->ns_reclen - len;
601 					mbuf_setdata(m, mdata, mlen);
602 					len = slp->ns_reclen;
603 				} else if ((len + mlen) == slp->ns_reclen) {
604 					om = m;
605 					len += mlen;
606 					m = mbuf_next(m);
607 					recm = slp->ns_raw;
608 					if (mbuf_setnext(om, NULL)) {
609 						printf("nfsrv_getstream: mbuf_setnext failed 2\n");
610 						slp->ns_flag &= ~SLP_GETSTREAM;
611 						return EWOULDBLOCK;
612 					}
613 					mlen = mbuf_len(m);
614 					mdata = mbuf_data(m);
615 				} else {
616 					om = m;
617 					len += mlen;
618 					m = mbuf_next(m);
619 					mlen = mbuf_len(m);
620 					mdata = mbuf_data(m);
621 				}
622 			}
623 			slp->ns_raw = m;
624 			slp->ns_cc -= len;
625 			slp->ns_reclen = 0;
626 		} else {
627 			slp->ns_flag &= ~SLP_GETSTREAM;
628 			return 0;
629 		}
630 
631 		/*
632 		 * Accumulate the fragments into a record.
633 		 */
634 		if (slp->ns_frag == NULL) {
635 			slp->ns_frag = recm;
636 		} else {
637 			m = slp->ns_frag;
638 			while ((m2 = mbuf_next(m))) {
639 				m = m2;
640 			}
641 			if ((error = mbuf_setnext(m, recm))) {
642 				panic("nfsrv_getstream: mbuf_setnext failed 3, %d", error);
643 			}
644 		}
645 		if (slp->ns_flag & SLP_LASTFRAG) {
646 			if (slp->ns_recend) {
647 				mbuf_setnextpkt(slp->ns_recend, slp->ns_frag);
648 			} else {
649 				slp->ns_rec = slp->ns_frag;
650 				slp->ns_flag |= SLP_DOREC;
651 				OSAddAtomic(-slp->ns_recslen, &nfsrv_unprocessed_rpc_current);
652 				slp->ns_recslen = 0;
653 			}
654 			slp->ns_recend = slp->ns_frag;
655 			slp->ns_frag = NULL;
656 		}
657 	}
658 }
659 
660 /*
661  * Parse an RPC header.
662  */
663 int
nfsrv_dorec(struct nfsrv_sock * slp,struct nfsd * nfsd,struct nfsrv_descript ** ndp)664 nfsrv_dorec(
665 	struct nfsrv_sock *slp,
666 	struct nfsd *nfsd,
667 	struct nfsrv_descript **ndp)
668 {
669 	mbuf_t m;
670 	mbuf_t nam;
671 	struct nfsrv_descript *nd;
672 	int error = 0;
673 
674 	*ndp = NULL;
675 	if (!(slp->ns_flag & (SLP_VALID | SLP_DOREC)) || (slp->ns_rec == NULL)) {
676 		return ENOBUFS;
677 	}
678 	nd = zalloc(nfsrv_descript_zone);
679 	m = slp->ns_rec;
680 	slp->ns_rec = mbuf_nextpkt(m);
681 	if (slp->ns_rec) {
682 		mbuf_setnextpkt(m, NULL);
683 	} else {
684 		slp->ns_flag &= ~SLP_DOREC;
685 		slp->ns_recend = NULL;
686 	}
687 	slp->ns_reccnt--;
688 	if (mbuf_type(m) == MBUF_TYPE_SONAME) {
689 		nam = m;
690 		m = mbuf_next(m);
691 		if ((error = mbuf_setnext(nam, NULL))) {
692 			panic("nfsrv_dorec: mbuf_setnext failed %d", error);
693 		}
694 	} else {
695 		nam = NULL;
696 	}
697 	nd->nd_nam2 = nam;
698 	nfsm_chain_dissect_init(error, &nd->nd_nmreq, m);
699 	if (!error) {
700 		error = nfsrv_getreq(nd);
701 	}
702 	if (error) {
703 		if (nam) {
704 			mbuf_freem(nam);
705 		}
706 		if (nd->nd_gss_context) {
707 			nfs_gss_svc_ctx_deref(nd->nd_gss_context);
708 		}
709 		NFS_ZFREE(nfsrv_descript_zone, nd);
710 		return error;
711 	}
712 	nd->nd_mrep = NULL;
713 	*ndp = nd;
714 	nfsd->nfsd_nd = nd;
715 	return 0;
716 }
717 
718 /*
719  * Parse an RPC request
720  * - verify it
721  * - fill in the cred struct.
722  */
723 int
nfsrv_getreq(struct nfsrv_descript * nd)724 nfsrv_getreq(struct nfsrv_descript *nd)
725 {
726 	struct nfsm_chain *nmreq;
727 	int len, i;
728 	u_int32_t nfsvers, auth_type;
729 	int error = 0;
730 	uid_t user_id;
731 	gid_t group_id;
732 	short ngroups;
733 	uint32_t val;
734 
735 	nd->nd_cr = NULL;
736 	nd->nd_gss_context = NULL;
737 	nd->nd_gss_seqnum = 0;
738 	nd->nd_gss_mb = NULL;
739 
740 	user_id = group_id = -2;
741 	val = auth_type = len = 0;
742 
743 	nmreq = &nd->nd_nmreq;
744 	nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID
745 	nfsm_chain_get_32(error, nmreq, val);           // RPC Call
746 	if (!error && (val != RPC_CALL)) {
747 		error = EBADRPC;
748 	}
749 	nfsmout_if(error);
750 	nd->nd_repstat = 0;
751 	nfsm_chain_get_32(error, nmreq, val);   // RPC Version
752 	nfsmout_if(error);
753 	if (val != RPC_VER2) {
754 		nd->nd_repstat = ERPCMISMATCH;
755 		nd->nd_procnum = NFSPROC_NOOP;
756 		return 0;
757 	}
758 	nfsm_chain_get_32(error, nmreq, val);   // RPC Program Number
759 	nfsmout_if(error);
760 	if (val != NFS_PROG) {
761 		nd->nd_repstat = EPROGUNAVAIL;
762 		nd->nd_procnum = NFSPROC_NOOP;
763 		return 0;
764 	}
765 	nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number
766 	nfsmout_if(error);
767 	if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) {
768 		nd->nd_repstat = EPROGMISMATCH;
769 		nd->nd_procnum = NFSPROC_NOOP;
770 		return 0;
771 	}
772 	nd->nd_vers = nfsvers;
773 	nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number
774 	nfsmout_if(error);
775 	if ((nd->nd_procnum >= NFS_NPROCS) ||
776 	    ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) {
777 		nd->nd_repstat = EPROCUNAVAIL;
778 		nd->nd_procnum = NFSPROC_NOOP;
779 		return 0;
780 	}
781 	if (nfsvers != NFS_VER3) {
782 		nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
783 	}
784 	nfsm_chain_get_32(error, nmreq, auth_type);     // Auth Flavor
785 	nfsm_chain_get_32(error, nmreq, len);           // Auth Length
786 	if (!error && (len < 0 || len > RPCAUTH_MAXSIZ)) {
787 		error = EBADRPC;
788 	}
789 	nfsmout_if(error);
790 
791 	/* Handle authentication */
792 	if (auth_type == RPCAUTH_SYS) {
793 		struct posix_cred temp_pcred;
794 		if (nd->nd_procnum == NFSPROC_NULL) {
795 			return 0;
796 		}
797 		nd->nd_sec = RPCAUTH_SYS;
798 		nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);    // skip stamp
799 		nfsm_chain_get_32(error, nmreq, len);           // hostname length
800 		if (len < 0 || len > NFS_MAXNAMLEN) {
801 			error = EBADRPC;
802 		}
803 		nfsm_chain_adv(error, nmreq, nfsm_rndup(len));  // skip hostname
804 		nfsmout_if(error);
805 
806 		/* create a temporary credential using the bits from the wire */
807 		bzero(&temp_pcred, sizeof(temp_pcred));
808 		nfsm_chain_get_32(error, nmreq, user_id);
809 		nfsm_chain_get_32(error, nmreq, group_id);
810 		temp_pcred.cr_groups[0] = group_id;
811 		nfsm_chain_get_32(error, nmreq, len);           // extra GID count
812 		if ((len < 0) || (len > RPCAUTH_UNIXGIDS)) {
813 			error = EBADRPC;
814 		}
815 		nfsmout_if(error);
816 		for (i = 1; i <= len; i++) {
817 			if (i < NGROUPS) {
818 				nfsm_chain_get_32(error, nmreq, temp_pcred.cr_groups[i]);
819 			} else {
820 				nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
821 			}
822 		}
823 		nfsmout_if(error);
824 		ngroups = (len >= NGROUPS) ? NGROUPS : (short)(len + 1);
825 		if (ngroups > 1) {
826 			nfsrv_group_sort(&temp_pcred.cr_groups[0], ngroups);
827 		}
828 		nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);    // verifier flavor (should be AUTH_NONE)
829 		nfsm_chain_get_32(error, nmreq, len);           // verifier length
830 		if (len < 0 || len > RPCAUTH_MAXSIZ) {
831 			error = EBADRPC;
832 		}
833 		if (len > 0) {
834 			nfsm_chain_adv(error, nmreq, nfsm_rndup(len));
835 		}
836 
837 		/* request creation of a real credential */
838 		temp_pcred.cr_uid = user_id;
839 		temp_pcred.cr_ngroups = ngroups;
840 		nd->nd_cr = posix_cred_create(&temp_pcred);
841 		if (nd->nd_cr == NULL) {
842 			nd->nd_repstat = ENOMEM;
843 			nd->nd_procnum = NFSPROC_NOOP;
844 			return 0;
845 		}
846 	} else if (auth_type == RPCSEC_GSS) {
847 		error = nfs_gss_svc_cred_get(nd, nmreq);
848 		if (error) {
849 			if (error == EINVAL) {
850 				goto nfsmout;   // drop the request
851 			}
852 			nd->nd_repstat = error;
853 			nd->nd_procnum = NFSPROC_NOOP;
854 			return 0;
855 		}
856 	} else {
857 		if (nd->nd_procnum == NFSPROC_NULL) {   // assume it's AUTH_NONE
858 			return 0;
859 		}
860 		nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
861 		nd->nd_procnum = NFSPROC_NOOP;
862 		return 0;
863 	}
864 	return 0;
865 nfsmout:
866 	if (IS_VALID_CRED(nd->nd_cr)) {
867 		kauth_cred_unref(&nd->nd_cr);
868 	}
869 	nfsm_chain_cleanup(nmreq);
870 	return error;
871 }
872 
873 /*
874  * Search for a sleeping nfsd and wake it up.
875  * SIDE EFFECT: If none found, make sure the socket is queued up so that one
876  * of the running nfsds will go look for the work in the nfsrv_sockwait list.
877  * Note: Must be called with nfsd_mutex held.
878  */
879 void
nfsrv_wakenfsd(struct nfsrv_sock * slp)880 nfsrv_wakenfsd(struct nfsrv_sock *slp)
881 {
882 	struct nfsd *nd;
883 
884 	while (1) {
885 		if ((slp->ns_flag & SLP_VALID) == 0) {
886 			return;
887 		}
888 		if (lck_rw_try_lock_exclusive(&slp->ns_rwlock)) {
889 			/* Exclusive lock acquired */
890 			break;
891 		}
892 		IOSleep(NFS_TRYLOCK_MSEC_SLEEP);
893 	}
894 
895 	/* if there's work to do on this socket, make sure it's queued up */
896 	if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) {
897 		TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq);
898 		slp->ns_flag |= SLP_WAITQ;
899 	}
900 	lck_rw_done(&slp->ns_rwlock);
901 
902 	/* wake up a waiting nfsd, if possible */
903 	nd = TAILQ_FIRST(&nfsd_queue);
904 	if (!nd) {
905 		return;
906 	}
907 
908 	TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue);
909 	nd->nfsd_flag &= ~NFSD_WAITING;
910 	wakeup(nd);
911 }
912 
913 #endif /* CONFIG_NFS_SERVER */
914