xref: /xnu-10002.1.13/bsd/nfs/nfs_socket.c (revision 1031c584a5e37aff177559b9f69dbd3c8c3fd30a)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1989, 1991, 1993, 1995
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * Rick Macklem at The University of Guelph.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)nfs_socket.c	8.5 (Berkeley) 3/30/95
65  * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
66  */
67 
68 #include <nfs/nfs_conf.h>
69 #if CONFIG_NFS_SERVER
70 
71 /*
72  * Socket operations for use by nfs
73  */
74 
75 #include <sys/systm.h>
76 #include <sys/kauth.h>
77 #include <sys/mount_internal.h>
78 #include <sys/kpi_mbuf.h>
79 #include <IOKit/IOLib.h>
80 
81 #include <netinet/in.h>
82 
83 #include <nfs/rpcv2.h>
84 #include <nfs/nfsproto.h>
85 #include <nfs/nfs.h>
86 #include <nfs/xdr_subs.h>
87 #include <nfs/nfsm_subs.h>
88 #include <nfs/nfs_gss.h>
89 
90 ZONE_DEFINE(nfsrv_descript_zone, "NFSV3 srvdesc",
91     sizeof(struct nfsrv_descript), ZC_NONE);
92 
93 int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */
94 
95 uint32_t nfsrv_unprocessed_rpc_current = 0; /* Current bytes of unprocessed RPC records */
96 uint32_t nfsrv_unprocessed_rpc_max = (64 * 1024 * 1024); /* Max bytes of unprocessed RPC records - 64MB by default */
97 
98 int nfsrv_getstream(struct nfsrv_sock *, int);
99 int nfsrv_getreq(struct nfsrv_descript *);
100 extern int nfsv3_procid[NFS_NPROCS];
101 
102 #define NFS_TRYLOCK_MSEC_SLEEP 1
103 
104 /*
105  * compare two sockaddr structures
106  */
107 int
nfs_sockaddr_cmp(struct sockaddr * sa1,struct sockaddr * sa2)108 nfs_sockaddr_cmp(struct sockaddr *sa1, struct sockaddr *sa2)
109 {
110 	if (!sa1) {
111 		return -1;
112 	}
113 	if (!sa2) {
114 		return 1;
115 	}
116 	if (sa1->sa_family != sa2->sa_family) {
117 		return (sa1->sa_family < sa2->sa_family) ? -1 : 1;
118 	}
119 	if (sa1->sa_len != sa2->sa_len) {
120 		return (sa1->sa_len < sa2->sa_len) ? -1 : 1;
121 	}
122 	if (sa1->sa_family == AF_INET) {
123 		return bcmp(&((struct sockaddr_in*)sa1)->sin_addr,
124 		           &((struct sockaddr_in*)sa2)->sin_addr, sizeof(((struct sockaddr_in*)sa1)->sin_addr));
125 	}
126 	if (sa1->sa_family == AF_INET6) {
127 		return bcmp(&((struct sockaddr_in6*)sa1)->sin6_addr,
128 		           &((struct sockaddr_in6*)sa2)->sin6_addr, sizeof(((struct sockaddr_in6*)sa1)->sin6_addr));
129 	}
130 	return -1;
131 }
132 
133 /*
134  * Generate the rpc reply header
135  * siz arg. is used to decide if adding a cluster is worthwhile
136  */
137 int
nfsrv_rephead(struct nfsrv_descript * nd,__unused struct nfsrv_sock * slp,struct nfsm_chain * nmrepp,size_t siz)138 nfsrv_rephead(
139 	struct nfsrv_descript *nd,
140 	__unused struct nfsrv_sock *slp,
141 	struct nfsm_chain *nmrepp,
142 	size_t siz)
143 {
144 	mbuf_t mrep;
145 	u_int32_t *tl;
146 	struct nfsm_chain nmrep;
147 	int err, error;
148 
149 	err = nd->nd_repstat;
150 	if (err && (nd->nd_vers == NFS_VER2)) {
151 		siz = 0;
152 	}
153 
154 	/*
155 	 * If this is a big reply, use a cluster else
156 	 * try and leave leading space for the lower level headers.
157 	 */
158 	siz += RPC_REPLYSIZ;
159 	if (siz >= nfs_mbuf_minclsize) {
160 		error = mbuf_getpacket(MBUF_WAITOK, &mrep);
161 	} else {
162 		error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep);
163 	}
164 	if (error) {
165 		/* unable to allocate packet */
166 		/* XXX should we keep statistics for these errors? */
167 		return error;
168 	}
169 	if (siz < nfs_mbuf_minclsize) {
170 		/* leave space for lower level headers */
171 		tl = mbuf_data(mrep);
172 		tl += 80 / sizeof(*tl);  /* XXX max_hdr? XXX */
173 		mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED);
174 	}
175 	nfsm_chain_init(&nmrep, mrep);
176 	nfsm_chain_add_32(error, &nmrep, nd->nd_retxid);
177 	nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
178 	if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
179 		nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
180 		if (err & NFSERR_AUTHERR) {
181 			nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
182 			nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR));
183 		} else {
184 			nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
185 			nfsm_chain_add_32(error, &nmrep, RPC_VER2);
186 			nfsm_chain_add_32(error, &nmrep, RPC_VER2);
187 		}
188 	} else {
189 		/* reply status */
190 		nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
191 		if (nd->nd_gss_context != NULL) {
192 			/* RPCSEC_GSS verifier */
193 			error = nfs_gss_svc_verf_put(nd, &nmrep);
194 			if (error) {
195 				nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR);
196 				goto done;
197 			}
198 		} else {
199 			/* RPCAUTH_NULL verifier */
200 			nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
201 			nfsm_chain_add_32(error, &nmrep, 0);
202 		}
203 		/* accepted status */
204 		switch (err) {
205 		case EPROGUNAVAIL:
206 			nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
207 			break;
208 		case EPROGMISMATCH:
209 			nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
210 			/* XXX hard coded versions? */
211 			nfsm_chain_add_32(error, &nmrep, NFS_VER2);
212 			nfsm_chain_add_32(error, &nmrep, NFS_VER3);
213 			break;
214 		case EPROCUNAVAIL:
215 			nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
216 			break;
217 		case EBADRPC:
218 			nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
219 			break;
220 		default:
221 			nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
222 			if (nd->nd_gss_context != NULL) {
223 				error = nfs_gss_svc_prepare_reply(nd, &nmrep);
224 			}
225 			if (err != NFSERR_RETVOID) {
226 				nfsm_chain_add_32(error, &nmrep,
227 				    (err ? nfsrv_errmap(nd, err) : 0));
228 			}
229 			break;
230 		}
231 	}
232 
233 done:
234 	nfsm_chain_build_done(error, &nmrep);
235 	if (error) {
236 		/* error composing reply header */
237 		/* XXX should we keep statistics for these errors? */
238 		mbuf_freem(mrep);
239 		return error;
240 	}
241 
242 	*nmrepp = nmrep;
243 	if ((err != 0) && (err != NFSERR_RETVOID)) {
244 		OSAddAtomic64(1, &nfsrvstats.srvrpc_errs);
245 	}
246 	return 0;
247 }
248 
249 /*
250  * The nfs server send routine.
251  *
252  * - return EINTR or ERESTART if interrupted by a signal
253  * - return EPIPE if a connection is lost for connection based sockets (TCP...)
254  * - do any cleanup required by recoverable socket errors (???)
255  */
256 int
nfsrv_send(struct nfsrv_sock * slp,mbuf_t nam,mbuf_t top)257 nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top)
258 {
259 	int error;
260 	socket_t so = slp->ns_so;
261 	struct sockaddr *sendnam;
262 	struct msghdr msg;
263 
264 	bzero(&msg, sizeof(msg));
265 	if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) {
266 		if ((sendnam = mbuf_data(nam))) {
267 			msg.msg_name = (caddr_t)sendnam;
268 			msg.msg_namelen = sendnam->sa_len;
269 		}
270 	}
271 	if (NFSRV_IS_DBG(NFSRV_FAC_SRV, 15)) {
272 		nfs_dump_mbuf(__func__, __LINE__, "nfsrv_send\n", top);
273 	}
274 	error = sock_sendmbuf(so, &msg, top, 0, NULL);
275 	if (!error) {
276 		return 0;
277 	}
278 	log(LOG_INFO, "nfsd send error %d\n", error);
279 
280 	if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM)) {
281 		error = EPIPE;  /* zap TCP sockets if they time out on send */
282 	}
283 	/* Handle any recoverable (soft) socket errors here. (???) */
284 	if (error != EINTR && error != ERESTART && error != EIO &&
285 	    error != EWOULDBLOCK && error != EPIPE) {
286 		error = 0;
287 	}
288 
289 	return error;
290 }
291 
292 /*
293  * Socket upcall routine for the nfsd sockets.
294  * The caddr_t arg is a pointer to the "struct nfsrv_sock".
295  * Essentially do as much as possible non-blocking, else punt and it will
296  * be called with MBUF_WAITOK from an nfsd.
297  */
298 void
nfsrv_rcv(socket_t so,void * arg,int waitflag)299 nfsrv_rcv(socket_t so, void *arg, int waitflag)
300 {
301 	struct nfsrv_sock *slp = arg;
302 
303 	while (1) {
304 		if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID)) {
305 			return;
306 		}
307 		if (lck_rw_try_lock_exclusive(&slp->ns_rwlock)) {
308 			/* Exclusive lock acquired */
309 			break;
310 		}
311 		IOSleep(NFS_TRYLOCK_MSEC_SLEEP);
312 	}
313 
314 	nfsrv_rcv_locked(so, slp, waitflag);
315 	/* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
316 }
317 void
nfsrv_rcv_locked(socket_t so,struct nfsrv_sock * slp,int waitflag)318 nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
319 {
320 	mbuf_t m, mp, mhck, m2;
321 	int ns_flag = 0, error;
322 	struct msghdr   msg;
323 	size_t bytes_read;
324 
325 	if ((slp->ns_flag & SLP_VALID) == 0) {
326 		if (waitflag == MBUF_DONTWAIT) {
327 			lck_rw_done(&slp->ns_rwlock);
328 		}
329 		return;
330 	}
331 
332 #ifdef notdef
333 	/*
334 	 * Define this to test for nfsds handling this under heavy load.
335 	 */
336 	if (waitflag == MBUF_DONTWAIT) {
337 		ns_flag = SLP_NEEDQ;
338 		goto dorecs;
339 	}
340 #endif
341 	if (slp->ns_sotype == SOCK_STREAM) {
342 		/*
343 		 * If there are already records on the queue, defer soreceive()
344 		 * to an(other) nfsd so that there is feedback to the TCP layer that
345 		 * the nfs servers are heavily loaded.
346 		 */
347 		if (slp->ns_rec) {
348 			ns_flag = SLP_NEEDQ;
349 			goto dorecs;
350 		}
351 
352 		/*
353 		 * Do soreceive().
354 		 */
355 		bytes_read = 1000000000;
356 		error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read);
357 		if (error || mp == NULL) {
358 			if (error == EWOULDBLOCK) {
359 				ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0;
360 			} else {
361 				ns_flag = SLP_DISCONN;
362 			}
363 			goto dorecs;
364 		}
365 		m = mp;
366 		if (slp->ns_rawend) {
367 			if ((error = mbuf_setnext(slp->ns_rawend, m))) {
368 				panic("nfsrv_rcv: mbuf_setnext failed %d", error);
369 			}
370 			slp->ns_cc += bytes_read;
371 		} else {
372 			slp->ns_raw = m;
373 			slp->ns_cc = bytes_read;
374 		}
375 		while ((m2 = mbuf_next(m))) {
376 			m = m2;
377 		}
378 		slp->ns_rawend = m;
379 
380 		/*
381 		 * Now try and parse record(s) out of the raw stream data.
382 		 */
383 		error = nfsrv_getstream(slp, waitflag);
384 		if (error) {
385 			if (error == EWOULDBLOCK) {
386 				ns_flag = SLP_NEEDQ;
387 			} else {
388 				ns_flag = SLP_DISCONN;
389 			}
390 		}
391 	} else {
392 		struct sockaddr_storage nam;
393 
394 		if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) {
395 			/* already have max # RPC records queued on this socket */
396 			ns_flag = SLP_NEEDQ;
397 			goto dorecs;
398 		}
399 
400 		bzero(&msg, sizeof(msg));
401 		msg.msg_name = (caddr_t)&nam;
402 		msg.msg_namelen = sizeof(nam);
403 
404 		do {
405 			bytes_read = 1000000000;
406 			error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
407 			if (mp) {
408 				if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) {
409 					mbuf_setlen(mhck, nam.ss_len);
410 					bcopy(&nam, mbuf_data(mhck), nam.ss_len);
411 					m = mhck;
412 					if (mbuf_setnext(m, mp)) {
413 						/* trouble... just drop it */
414 						printf("nfsrv_rcv: mbuf_setnext failed\n");
415 						mbuf_free(mhck);
416 						m = mp;
417 					}
418 				} else {
419 					m = mp;
420 				}
421 				if (slp->ns_recend) {
422 					mbuf_setnextpkt(slp->ns_recend, m);
423 				} else {
424 					slp->ns_rec = m;
425 					slp->ns_flag |= SLP_DOREC;
426 				}
427 				slp->ns_recend = m;
428 				mbuf_setnextpkt(m, NULL);
429 				slp->ns_reccnt++;
430 			}
431 		} while (mp);
432 	}
433 
434 	/*
435 	 * Now try and process the request records, non-blocking.
436 	 */
437 dorecs:
438 	if (ns_flag) {
439 		slp->ns_flag |= ns_flag;
440 	}
441 	if (waitflag == MBUF_DONTWAIT) {
442 		int wake = (slp->ns_flag & SLP_WORKTODO);
443 		lck_rw_done(&slp->ns_rwlock);
444 		if (wake && nfsd_thread_count) {
445 			while (1) {
446 				if ((slp->ns_flag & SLP_VALID) == 0) {
447 					break;
448 				}
449 				if (lck_mtx_try_lock(&nfsd_mutex)) {
450 					/* Mutex acquired */
451 					nfsrv_wakenfsd(slp);
452 					lck_mtx_unlock(&nfsd_mutex);
453 					break;
454 				}
455 				IOSleep(NFS_TRYLOCK_MSEC_SLEEP);
456 			}
457 		}
458 	}
459 }
460 
461 /*
462  * Try and extract an RPC request from the mbuf data list received on a
463  * stream socket. The "waitflag" argument indicates whether or not it
464  * can sleep.
465  */
466 int
nfsrv_getstream(struct nfsrv_sock * slp,int waitflag)467 nfsrv_getstream(struct nfsrv_sock *slp, int waitflag)
468 {
469 	mbuf_t m;
470 	char *cp1, *cp2, *mdata;
471 	int error;
472 	size_t len, mlen;
473 	mbuf_t om, m2, recm;
474 	u_int32_t recmark;
475 
476 	if (slp->ns_flag & SLP_GETSTREAM) {
477 		panic("nfs getstream");
478 	}
479 	slp->ns_flag |= SLP_GETSTREAM;
480 	for (;;) {
481 		if (slp->ns_reclen == 0) {
482 			if (slp->ns_cc < NFSX_UNSIGNED) {
483 				slp->ns_flag &= ~SLP_GETSTREAM;
484 				return 0;
485 			}
486 			m = slp->ns_raw;
487 			mdata = mbuf_data(m);
488 			mlen = mbuf_len(m);
489 			if (mlen >= NFSX_UNSIGNED) {
490 				bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED);
491 				mdata += NFSX_UNSIGNED;
492 				mlen -= NFSX_UNSIGNED;
493 				mbuf_setdata(m, mdata, mlen);
494 			} else {
495 				cp1 = (caddr_t)&recmark;
496 				cp2 = mdata;
497 				while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
498 					while (mlen == 0) {
499 						m = mbuf_next(m);
500 						cp2 = mbuf_data(m);
501 						mlen = mbuf_len(m);
502 					}
503 					*cp1++ = *cp2++;
504 					mlen--;
505 					mbuf_setdata(m, cp2, mlen);
506 				}
507 			}
508 			slp->ns_cc -= NFSX_UNSIGNED;
509 			recmark = ntohl(recmark);
510 			slp->ns_reclen = recmark & ~0x80000000;
511 			if (recmark & 0x80000000) {
512 				slp->ns_flag |= SLP_LASTFRAG;
513 			} else {
514 				slp->ns_flag &= ~SLP_LASTFRAG;
515 			}
516 			if (slp->ns_reclen <= 0 || slp->ns_reclen > NFS_MAXPACKET) {
517 				slp->ns_flag &= ~SLP_GETSTREAM;
518 				return EINVAL;
519 			}
520 			/* check if we have reached the max allowed memory consumption */
521 			if (nfsrv_unprocessed_rpc_max && (nfsrv_unprocessed_rpc_current + slp->ns_reclen > nfsrv_unprocessed_rpc_max)) {
522 				slp->ns_flag &= ~SLP_GETSTREAM;
523 				printf("nfsrv_getstream: nfsrv_unprocessed_rpc_current (%u) has reached the max allowed consumption (%u)\n", nfsrv_unprocessed_rpc_current, nfsrv_unprocessed_rpc_max);
524 				return ENOBUFS;
525 			}
526 			OSAddAtomic(slp->ns_reclen, &nfsrv_unprocessed_rpc_current);
527 			slp->ns_recslen += slp->ns_reclen;
528 		}
529 
530 		/*
531 		 * Now get the record part.
532 		 *
533 		 * Note that slp->ns_reclen may be 0.  Linux sometimes
534 		 * generates 0-length RPCs
535 		 */
536 		recm = NULL;
537 		if (slp->ns_cc == slp->ns_reclen) {
538 			recm = slp->ns_raw;
539 			slp->ns_raw = slp->ns_rawend = NULL;
540 			slp->ns_cc = slp->ns_reclen = 0;
541 		} else if (slp->ns_cc > slp->ns_reclen) {
542 			len = 0;
543 			m = slp->ns_raw;
544 			mlen = mbuf_len(m);
545 			mdata = mbuf_data(m);
546 			om = NULL;
547 			while (len < slp->ns_reclen) {
548 				if ((len + mlen) > slp->ns_reclen) {
549 					if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) {
550 						slp->ns_flag &= ~SLP_GETSTREAM;
551 						return EWOULDBLOCK;
552 					}
553 					if (om) {
554 						if (mbuf_setnext(om, m2)) {
555 							/* trouble... just drop it */
556 							printf("nfsrv_getstream: mbuf_setnext failed\n");
557 							mbuf_freem(m2);
558 							slp->ns_flag &= ~SLP_GETSTREAM;
559 							return EWOULDBLOCK;
560 						}
561 						recm = slp->ns_raw;
562 					} else {
563 						recm = m2;
564 					}
565 					mdata += slp->ns_reclen - len;
566 					mlen -= slp->ns_reclen - len;
567 					mbuf_setdata(m, mdata, mlen);
568 					len = slp->ns_reclen;
569 				} else if ((len + mlen) == slp->ns_reclen) {
570 					om = m;
571 					len += mlen;
572 					m = mbuf_next(m);
573 					recm = slp->ns_raw;
574 					if (mbuf_setnext(om, NULL)) {
575 						printf("nfsrv_getstream: mbuf_setnext failed 2\n");
576 						slp->ns_flag &= ~SLP_GETSTREAM;
577 						return EWOULDBLOCK;
578 					}
579 					mlen = mbuf_len(m);
580 					mdata = mbuf_data(m);
581 				} else {
582 					om = m;
583 					len += mlen;
584 					m = mbuf_next(m);
585 					mlen = mbuf_len(m);
586 					mdata = mbuf_data(m);
587 				}
588 			}
589 			slp->ns_raw = m;
590 			slp->ns_cc -= len;
591 			slp->ns_reclen = 0;
592 		} else {
593 			slp->ns_flag &= ~SLP_GETSTREAM;
594 			return 0;
595 		}
596 
597 		/*
598 		 * Accumulate the fragments into a record.
599 		 */
600 		if (slp->ns_frag == NULL) {
601 			slp->ns_frag = recm;
602 		} else {
603 			m = slp->ns_frag;
604 			while ((m2 = mbuf_next(m))) {
605 				m = m2;
606 			}
607 			if ((error = mbuf_setnext(m, recm))) {
608 				panic("nfsrv_getstream: mbuf_setnext failed 3, %d", error);
609 			}
610 		}
611 		if (slp->ns_flag & SLP_LASTFRAG) {
612 			if (slp->ns_recend) {
613 				mbuf_setnextpkt(slp->ns_recend, slp->ns_frag);
614 			} else {
615 				slp->ns_rec = slp->ns_frag;
616 				slp->ns_flag |= SLP_DOREC;
617 				OSAddAtomic(-slp->ns_recslen, &nfsrv_unprocessed_rpc_current);
618 				slp->ns_recslen = 0;
619 			}
620 			slp->ns_recend = slp->ns_frag;
621 			slp->ns_frag = NULL;
622 		}
623 	}
624 }
625 
626 /*
627  * Parse an RPC header.
628  */
629 int
nfsrv_dorec(struct nfsrv_sock * slp,struct nfsd * nfsd,struct nfsrv_descript ** ndp)630 nfsrv_dorec(
631 	struct nfsrv_sock *slp,
632 	struct nfsd *nfsd,
633 	struct nfsrv_descript **ndp)
634 {
635 	mbuf_t m;
636 	mbuf_t nam;
637 	struct nfsrv_descript *nd;
638 	int error = 0;
639 
640 	*ndp = NULL;
641 	if (!(slp->ns_flag & (SLP_VALID | SLP_DOREC)) || (slp->ns_rec == NULL)) {
642 		return ENOBUFS;
643 	}
644 	nd = zalloc(nfsrv_descript_zone);
645 	m = slp->ns_rec;
646 	slp->ns_rec = mbuf_nextpkt(m);
647 	if (slp->ns_rec) {
648 		mbuf_setnextpkt(m, NULL);
649 	} else {
650 		slp->ns_flag &= ~SLP_DOREC;
651 		slp->ns_recend = NULL;
652 	}
653 	slp->ns_reccnt--;
654 	if (mbuf_type(m) == MBUF_TYPE_SONAME) {
655 		nam = m;
656 		m = mbuf_next(m);
657 		if ((error = mbuf_setnext(nam, NULL))) {
658 			panic("nfsrv_dorec: mbuf_setnext failed %d", error);
659 		}
660 	} else {
661 		nam = NULL;
662 	}
663 	nd->nd_nam2 = nam;
664 	nfsm_chain_dissect_init(error, &nd->nd_nmreq, m);
665 	if (!error) {
666 		error = nfsrv_getreq(nd);
667 	}
668 	if (error) {
669 		if (nam) {
670 			mbuf_freem(nam);
671 		}
672 		if (nd->nd_gss_context) {
673 			nfs_gss_svc_ctx_deref(nd->nd_gss_context);
674 		}
675 		NFS_ZFREE(nfsrv_descript_zone, nd);
676 		return error;
677 	}
678 	nd->nd_mrep = NULL;
679 	*ndp = nd;
680 	nfsd->nfsd_nd = nd;
681 	return 0;
682 }
683 
684 /*
685  * Parse an RPC request
686  * - verify it
687  * - fill in the cred struct.
688  */
689 int
nfsrv_getreq(struct nfsrv_descript * nd)690 nfsrv_getreq(struct nfsrv_descript *nd)
691 {
692 	struct nfsm_chain *nmreq;
693 	int len, i;
694 	u_int32_t nfsvers, auth_type;
695 	int error = 0;
696 	uid_t user_id;
697 	gid_t group_id;
698 	short ngroups;
699 	uint32_t val;
700 
701 	nd->nd_cr = NULL;
702 	nd->nd_gss_context = NULL;
703 	nd->nd_gss_seqnum = 0;
704 	nd->nd_gss_mb = NULL;
705 
706 	user_id = group_id = -2;
707 	val = auth_type = len = 0;
708 
709 	nmreq = &nd->nd_nmreq;
710 	nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID
711 	nfsm_chain_get_32(error, nmreq, val);           // RPC Call
712 	if (!error && (val != RPC_CALL)) {
713 		error = EBADRPC;
714 	}
715 	nfsmout_if(error);
716 	nd->nd_repstat = 0;
717 	nfsm_chain_get_32(error, nmreq, val);   // RPC Version
718 	nfsmout_if(error);
719 	if (val != RPC_VER2) {
720 		nd->nd_repstat = ERPCMISMATCH;
721 		nd->nd_procnum = NFSPROC_NOOP;
722 		return 0;
723 	}
724 	nfsm_chain_get_32(error, nmreq, val);   // RPC Program Number
725 	nfsmout_if(error);
726 	if (val != NFS_PROG) {
727 		nd->nd_repstat = EPROGUNAVAIL;
728 		nd->nd_procnum = NFSPROC_NOOP;
729 		return 0;
730 	}
731 	nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number
732 	nfsmout_if(error);
733 	if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) {
734 		nd->nd_repstat = EPROGMISMATCH;
735 		nd->nd_procnum = NFSPROC_NOOP;
736 		return 0;
737 	}
738 	nd->nd_vers = nfsvers;
739 	nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number
740 	nfsmout_if(error);
741 	if ((nd->nd_procnum >= NFS_NPROCS) ||
742 	    ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) {
743 		nd->nd_repstat = EPROCUNAVAIL;
744 		nd->nd_procnum = NFSPROC_NOOP;
745 		return 0;
746 	}
747 	if (nfsvers != NFS_VER3) {
748 		nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
749 	}
750 	nfsm_chain_get_32(error, nmreq, auth_type);     // Auth Flavor
751 	nfsm_chain_get_32(error, nmreq, len);           // Auth Length
752 	if (!error && (len < 0 || len > RPCAUTH_MAXSIZ)) {
753 		error = EBADRPC;
754 	}
755 	nfsmout_if(error);
756 
757 	/* Handle authentication */
758 	if (auth_type == RPCAUTH_SYS) {
759 		struct posix_cred temp_pcred;
760 		if (nd->nd_procnum == NFSPROC_NULL) {
761 			return 0;
762 		}
763 		nd->nd_sec = RPCAUTH_SYS;
764 		nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);    // skip stamp
765 		nfsm_chain_get_32(error, nmreq, len);           // hostname length
766 		if (len < 0 || len > NFS_MAXNAMLEN) {
767 			error = EBADRPC;
768 		}
769 		nfsm_chain_adv(error, nmreq, nfsm_rndup(len));  // skip hostname
770 		nfsmout_if(error);
771 
772 		/* create a temporary credential using the bits from the wire */
773 		bzero(&temp_pcred, sizeof(temp_pcred));
774 		nfsm_chain_get_32(error, nmreq, user_id);
775 		nfsm_chain_get_32(error, nmreq, group_id);
776 		temp_pcred.cr_groups[0] = group_id;
777 		nfsm_chain_get_32(error, nmreq, len);           // extra GID count
778 		if ((len < 0) || (len > RPCAUTH_UNIXGIDS)) {
779 			error = EBADRPC;
780 		}
781 		nfsmout_if(error);
782 		for (i = 1; i <= len; i++) {
783 			if (i < NGROUPS) {
784 				nfsm_chain_get_32(error, nmreq, temp_pcred.cr_groups[i]);
785 			} else {
786 				nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
787 			}
788 		}
789 		nfsmout_if(error);
790 		ngroups = (len >= NGROUPS) ? NGROUPS : (short)(len + 1);
791 		if (ngroups > 1) {
792 			nfsrv_group_sort(&temp_pcred.cr_groups[0], ngroups);
793 		}
794 		nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);    // verifier flavor (should be AUTH_NONE)
795 		nfsm_chain_get_32(error, nmreq, len);           // verifier length
796 		if (len < 0 || len > RPCAUTH_MAXSIZ) {
797 			error = EBADRPC;
798 		}
799 		if (len > 0) {
800 			nfsm_chain_adv(error, nmreq, nfsm_rndup(len));
801 		}
802 
803 		/* request creation of a real credential */
804 		temp_pcred.cr_uid = user_id;
805 		temp_pcred.cr_ngroups = ngroups;
806 		nd->nd_cr = posix_cred_create(&temp_pcred);
807 		if (nd->nd_cr == NULL) {
808 			nd->nd_repstat = ENOMEM;
809 			nd->nd_procnum = NFSPROC_NOOP;
810 			return 0;
811 		}
812 	} else if (auth_type == RPCSEC_GSS) {
813 		error = nfs_gss_svc_cred_get(nd, nmreq);
814 		if (error) {
815 			if (error == EINVAL) {
816 				goto nfsmout;   // drop the request
817 			}
818 			nd->nd_repstat = error;
819 			nd->nd_procnum = NFSPROC_NOOP;
820 			return 0;
821 		}
822 	} else {
823 		if (nd->nd_procnum == NFSPROC_NULL) {   // assume it's AUTH_NONE
824 			return 0;
825 		}
826 		nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
827 		nd->nd_procnum = NFSPROC_NOOP;
828 		return 0;
829 	}
830 	return 0;
831 nfsmout:
832 	if (IS_VALID_CRED(nd->nd_cr)) {
833 		kauth_cred_unref(&nd->nd_cr);
834 	}
835 	nfsm_chain_cleanup(nmreq);
836 	return error;
837 }
838 
839 /*
840  * Search for a sleeping nfsd and wake it up.
841  * SIDE EFFECT: If none found, make sure the socket is queued up so that one
842  * of the running nfsds will go look for the work in the nfsrv_sockwait list.
843  * Note: Must be called with nfsd_mutex held.
844  */
845 void
nfsrv_wakenfsd(struct nfsrv_sock * slp)846 nfsrv_wakenfsd(struct nfsrv_sock *slp)
847 {
848 	struct nfsd *nd;
849 
850 	while (1) {
851 		if ((slp->ns_flag & SLP_VALID) == 0) {
852 			return;
853 		}
854 		if (lck_rw_try_lock_exclusive(&slp->ns_rwlock)) {
855 			/* Exclusive lock acquired */
856 			break;
857 		}
858 		IOSleep(NFS_TRYLOCK_MSEC_SLEEP);
859 	}
860 
861 	/* if there's work to do on this socket, make sure it's queued up */
862 	if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) {
863 		TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq);
864 		slp->ns_flag |= SLP_WAITQ;
865 	}
866 	lck_rw_done(&slp->ns_rwlock);
867 
868 	/* wake up a waiting nfsd, if possible */
869 	nd = TAILQ_FIRST(&nfsd_queue);
870 	if (!nd) {
871 		return;
872 	}
873 
874 	TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue);
875 	nd->nfsd_flag &= ~NFSD_WAITING;
876 	wakeup(nd);
877 }
878 
879 #endif /* CONFIG_NFS_SERVER */
880