xref: /xnu-8020.101.4/bsd/nfs/nfs_lock.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2002-2016 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*-
29  * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
30  *
31  * Redistribution and use in source and binary forms, with or without
32  * modification, are permitted provided that the following conditions
33  * are met:
34  * 1. Redistributions of source code must retain the above copyright
35  *    notice, this list of conditions and the following disclaimer.
36  * 2. Redistributions in binary form must reproduce the above copyright
37  *    notice, this list of conditions and the following disclaimer in the
38  *    documentation and/or other materials provided with the distribution.
39  * 3. Berkeley Software Design Inc's name may not be used to endorse or
40  *    promote products derived from this software without specific prior
41  *    written permission.
42  *
43  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53  * SUCH DAMAGE.
54  *
55  *      from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
56  */
57 
58 #include <nfs/nfs_conf.h>
59 #if CONFIG_NFS_CLIENT
60 
61 #include <sys/cdefs.h>
62 #include <sys/param.h>
63 #include <sys/systm.h>
64 #include <sys/fcntl.h>
65 #include <sys/kernel.h>         /* for hz */
66 #include <sys/file_internal.h>
67 #include <sys/malloc.h>
68 #include <sys/lockf.h>          /* for hz */ /* Must come after sys/malloc.h */
69 #include <sys/kpi_mbuf.h>
70 #include <sys/mount_internal.h>
71 #include <sys/proc_internal.h>  /* for p_start */
72 #include <sys/kauth.h>
73 #include <sys/resourcevar.h>
74 #include <sys/socket.h>
75 #include <sys/unistd.h>
76 #include <sys/user.h>
77 #include <sys/vnode_internal.h>
78 
79 #include <kern/thread.h>
80 #include <kern/host.h>
81 
82 #include <machine/limits.h>
83 
84 #include <net/if.h>
85 
86 #include <nfs/rpcv2.h>
87 #include <nfs/nfsproto.h>
88 #include <nfs/nfs.h>
89 #include <nfs/nfs_gss.h>
90 #include <nfs/nfsmount.h>
91 #include <nfs/nfsnode.h>
92 #include <nfs/nfs_lock.h>
93 
94 #include <mach/host_priv.h>
95 #include <mach/mig_errors.h>
96 #include <mach/host_special_ports.h>
97 #include <lockd/lockd_mach.h>
98 
99 extern void ipc_port_release_send(ipc_port_t);
100 
101 /*
102  * pending lock request messages are kept in this queue which is
103  * kept sorted by transaction ID (xid).
104  */
105 static uint64_t nfs_lockxid = 0;
106 static LOCKD_MSG_QUEUE nfs_pendlockq = TAILQ_HEAD_INITIALIZER(nfs_pendlockq);
107 
108 /* list of mounts that are (potentially) making lockd requests */
109 TAILQ_HEAD(nfs_lockd_mount_list, nfsmount) nfs_lockd_mount_list =
110     TAILQ_HEAD_INITIALIZER(nfs_lockd_mount_list);
111 
112 static LCK_GRP_DECLARE(nfs_lock_lck_grp, "nfs_lock");
113 static LCK_MTX_DECLARE(nfs_lock_mutex, &nfs_lock_lck_grp);
114 
115 void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *);
116 void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *);
117 int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *, struct lockd_ans *);
118 LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_answer(struct lockd_ans *);
119 LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t);
120 uint64_t nfs_lockxid_get(void);
121 int nfs_lockd_send_request(LOCKD_MSG *, int);
122 
123 /*
124  * Register a mount as (potentially) making lockd requests.
125  */
126 void
nfs_lockd_mount_register(struct nfsmount * nmp)127 nfs_lockd_mount_register(struct nfsmount *nmp)
128 {
129 	lck_mtx_lock(&nfs_lock_mutex);
130 	TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink);
131 	nfs_lockd_mounts++;
132 	lck_mtx_unlock(&nfs_lock_mutex);
133 }
134 
135 /*
136  * Unregister a mount as (potentially) making lockd requests.
137  *
138  * When the lockd mount count drops to zero, then send a shutdown request to
139  * lockd if we've sent any requests to it.
140  */
141 void
nfs_lockd_mount_unregister(struct nfsmount * nmp)142 nfs_lockd_mount_unregister(struct nfsmount *nmp)
143 {
144 	int send_shutdown;
145 	mach_port_t lockd_port = IPC_PORT_NULL;
146 	kern_return_t kr;
147 
148 	lck_mtx_lock(&nfs_lock_mutex);
149 	if (nmp->nm_ldlink.tqe_next == NFSNOLIST) {
150 		lck_mtx_unlock(&nfs_lock_mutex);
151 		return;
152 	}
153 
154 	TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink);
155 	nmp->nm_ldlink.tqe_next = NFSNOLIST;
156 
157 	nfs_lockd_mounts--;
158 
159 	/* send a shutdown request if there are no more lockd mounts */
160 	send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent);
161 	if (send_shutdown) {
162 		nfs_lockd_request_sent = 0;
163 	}
164 
165 	lck_mtx_unlock(&nfs_lock_mutex);
166 
167 	if (!send_shutdown) {
168 		return;
169 	}
170 
171 	/*
172 	 * Let lockd know that it is no longer needed for any NFS mounts
173 	 */
174 	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
175 	if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(lockd_port)) {
176 		printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n",
177 		    kr, (lockd_port == IPC_PORT_NULL) ? "NULL" :
178 		    (lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID");
179 		return;
180 	}
181 
182 	kr = lockd_shutdown(lockd_port);
183 	if (kr != KERN_SUCCESS) {
184 		printf("nfs_lockd_mount_change: shutdown %d\n", kr);
185 	}
186 
187 	ipc_port_release_send(lockd_port);
188 }
189 
190 /*
191  * insert a lock request message into the pending queue
192  * (nfs_lock_mutex must be held)
193  */
194 void
nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST * msgreq)195 nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
196 {
197 	LOCKD_MSG_REQUEST *mr;
198 
199 	mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
200 	if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
201 		/* fast path: empty queue or new largest xid */
202 		TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
203 		return;
204 	}
205 	/* slow path: need to walk list to find insertion point */
206 	while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
207 		mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
208 	}
209 	if (mr) {
210 		TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
211 	} else {
212 		TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
213 	}
214 }
215 
216 /*
217  * remove a lock request message from the pending queue
218  * (nfs_lock_mutex must be held)
219  */
220 void
nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST * msgreq)221 nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
222 {
223 	TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
224 }
225 
226 /*
227  * find a pending lock request message by xid
228  *
229  * We search from the head of the list assuming that the message we're
230  * looking for is for an older request (because we have an answer to it).
231  * This assumes that lock request will be answered primarily in FIFO order.
232  * However, this may not be the case if there are blocked requests.  We may
233  * want to move blocked requests to a separate queue (but that'll complicate
234  * duplicate xid checking).
235  *
236  * (nfs_lock_mutex must be held)
237  */
238 LOCKD_MSG_REQUEST *
nfs_lockdmsg_find_by_xid(uint64_t lockxid)239 nfs_lockdmsg_find_by_xid(uint64_t lockxid)
240 {
241 	LOCKD_MSG_REQUEST *mr;
242 
243 	TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
244 		if (mr->lmr_msg.lm_xid == lockxid) {
245 			return mr;
246 		}
247 		if (mr->lmr_msg.lm_xid > lockxid) {
248 			return NULL;
249 		}
250 	}
251 	return mr;
252 }
253 
254 /*
255  * Because we can't depend on nlm_granted messages containing the same
256  * cookie we sent with the original lock request, we need code to test
257  * if an nlm_granted answer matches the lock request.  We also need code
258  * that can find a lockd message based solely on the nlm_granted answer.
259  */
260 
261 /*
262  * compare lockd message to answer
263  *
264  * returns 0 on equality and 1 if different
265  */
266 int
nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST * msgreq,struct lockd_ans * ansp)267 nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
268 {
269 	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) {
270 		return 1;
271 	}
272 	if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid) {
273 		return 1;
274 	}
275 	if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start) {
276 		return 1;
277 	}
278 	if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len) {
279 		return 1;
280 	}
281 	if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len) {
282 		return 1;
283 	}
284 	if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len)) {
285 		return 1;
286 	}
287 	return 0;
288 }
289 
290 /*
291  * find a pending lock request message based on the lock info provided
292  * in the lockd_ans/nlm_granted data.  We need this because we can't
293  * depend on nlm_granted messages containing the same cookie we sent
294  * with the original lock request.
295  *
296  * We search from the head of the list assuming that the message we're
297  * looking for is for an older request (because we have an answer to it).
298  * This assumes that lock request will be answered primarily in FIFO order.
299  * However, this may not be the case if there are blocked requests.  We may
300  * want to move blocked requests to a separate queue (but that'll complicate
301  * duplicate xid checking).
302  *
303  * (nfs_lock_mutex must be held)
304  */
305 LOCKD_MSG_REQUEST *
nfs_lockdmsg_find_by_answer(struct lockd_ans * ansp)306 nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
307 {
308 	LOCKD_MSG_REQUEST *mr;
309 
310 	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) {
311 		return NULL;
312 	}
313 	TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
314 		if (!nfs_lockdmsg_compare_to_answer(mr, ansp)) {
315 			break;
316 		}
317 	}
318 	return mr;
319 }
320 
321 /*
322  * return the next unique lock request transaction ID
323  * (nfs_lock_mutex must be held)
324  */
325 uint64_t
nfs_lockxid_get(void)326 nfs_lockxid_get(void)
327 {
328 	LOCKD_MSG_REQUEST *mr;
329 
330 	/* derive initial lock xid from system time */
331 	if (!nfs_lockxid) {
332 		/*
333 		 * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
334 		 * due to a broken clock) because we immediately increment it
335 		 * and we guarantee to never use xid 0.  So, nfs_lockxid should only
336 		 * ever be 0 the first time this function is called.
337 		 */
338 		struct timeval tv;
339 		microtime(&tv);
340 		nfs_lockxid = (uint64_t)tv.tv_sec << 12;
341 	}
342 
343 	/* make sure we get a unique xid */
344 	do {
345 		/* Skip zero xid if it should ever happen.  */
346 		if (++nfs_lockxid == 0) {
347 			nfs_lockxid++;
348 		}
349 		if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) ||
350 		    (mr->lmr_msg.lm_xid < nfs_lockxid)) {
351 			/* fast path: empty queue or new largest xid */
352 			break;
353 		}
354 		/* check if xid is already in use */
355 	} while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
356 
357 	return nfs_lockxid;
358 }
359 
360 #define MACH_MAX_TRIES 3
361 
362 int
nfs_lockd_send_request(LOCKD_MSG * msg,int interruptable)363 nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable)
364 {
365 	kern_return_t kr;
366 	int retries = 0;
367 	mach_port_t lockd_port = IPC_PORT_NULL;
368 
369 	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
370 	if (kr != KERN_SUCCESS || !IPC_PORT_VALID(lockd_port)) {
371 		return ENOTSUP;
372 	}
373 
374 	do {
375 		/* In the kernel all mach messaging is interruptable */
376 		do {
377 			kr = lockd_request(
378 				lockd_port,
379 				msg->lm_version,
380 				msg->lm_flags,
381 				msg->lm_xid,
382 				msg->lm_fl.l_start,
383 				msg->lm_fl.l_len,
384 				msg->lm_fl.l_pid,
385 				msg->lm_fl.l_type,
386 				msg->lm_fl.l_whence,
387 				(uint32_t *)&msg->lm_addr,
388 				(uint32_t *)&msg->lm_cred,
389 				msg->lm_fh_len,
390 				msg->lm_fh);
391 			if (kr != KERN_SUCCESS) {
392 				printf("lockd_request received %d!\n", kr);
393 			}
394 		} while (!interruptable && kr == MACH_SEND_INTERRUPTED);
395 	} while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES);
396 
397 	ipc_port_release_send(lockd_port);
398 	switch (kr) {
399 	case MACH_SEND_INTERRUPTED:
400 		return EINTR;
401 	default:
402 		/*
403 		 * Other MACH or MIG errors we will retry. Eventually
404 		 * we will call nfs_down and allow the user to disable
405 		 * locking.
406 		 */
407 		return EAGAIN;
408 	}
409 }
410 
411 /*
412  * NFS advisory byte-level locks (client)
413  */
414 int
nfs3_lockd_request(nfsnode_t np,int type,LOCKD_MSG_REQUEST * msgreq,int flags,thread_t thd)415 nfs3_lockd_request(
416 	nfsnode_t np,
417 	int type,
418 	LOCKD_MSG_REQUEST *msgreq,
419 	int flags,
420 	thread_t thd)
421 {
422 	LOCKD_MSG *msg = &msgreq->lmr_msg;
423 	int error, error2;
424 	int interruptable, slpflag;
425 	struct nfsmount *nmp;
426 	struct timeval now;
427 	int timeo, wentdown = 0;
428 	long starttime, endtime, lastmsg;
429 	struct timespec ts;
430 	struct sockaddr *saddr;
431 
432 	nmp = NFSTONMP(np);
433 	if (!nmp || !nmp->nm_saddr) {
434 		return ENXIO;
435 	}
436 
437 	lck_mtx_lock(&nmp->nm_lock);
438 	saddr = nmp->nm_saddr;
439 	bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
440 	if (nmp->nm_vers == NFS_VER3) {
441 		msg->lm_flags |= LOCKD_MSG_NFSV3;
442 	}
443 
444 	if (nmp->nm_sotype != SOCK_DGRAM) {
445 		msg->lm_flags |= LOCKD_MSG_TCP;
446 	}
447 
448 	microuptime(&now);
449 	starttime = now.tv_sec;
450 	lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
451 	interruptable = NMFLAG(nmp, INTR);
452 	lck_mtx_unlock(&nmp->nm_lock);
453 
454 	lck_mtx_lock(&nfs_lock_mutex);
455 
456 	/* allocate unique xid */
457 	msg->lm_xid = nfs_lockxid_get();
458 	nfs_lockdmsg_enqueue(msgreq);
459 
460 	timeo = 4;
461 
462 	for (;;) {
463 		nfs_lockd_request_sent = 1;
464 
465 		/* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */
466 		lck_mtx_unlock(&nfs_lock_mutex);
467 		error = nfs_lockd_send_request(msg, interruptable);
468 		lck_mtx_lock(&nfs_lock_mutex);
469 		if (error && error != EAGAIN) {
470 			break;
471 		}
472 
473 		/*
474 		 * Always wait for an answer.  Not waiting for unlocks could
475 		 * cause a lock to be left if the unlock request gets dropped.
476 		 */
477 
478 		/*
479 		 * Retry if it takes too long to get a response.
480 		 *
481 		 * The timeout numbers were picked out of thin air... they start
482 		 * at 4 and double each timeout with a max of 30 seconds.
483 		 *
484 		 * In order to maintain responsiveness, we pass a small timeout
485 		 * to msleep and calculate the timeouts ourselves.  This allows
486 		 * us to pick up on mount changes quicker.
487 		 */
488 wait_for_granted:
489 		error = EWOULDBLOCK;
490 		slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0;
491 		ts.tv_sec = 2;
492 		ts.tv_nsec = 0;
493 		microuptime(&now);
494 		endtime = now.tv_sec + timeo;
495 		while (now.tv_sec < endtime) {
496 			error = error2 = 0;
497 			if (!msgreq->lmr_answered) {
498 				error = msleep(msgreq, &nfs_lock_mutex, slpflag | PUSER, "lockd", &ts);
499 				slpflag = 0;
500 			}
501 			if (msgreq->lmr_answered) {
502 				/*
503 				 * Note: it's possible to have a lock granted at
504 				 * essentially the same time that we get interrupted.
505 				 * Since the lock may be granted, we can't return an
506 				 * error from this request or we might not unlock the
507 				 * lock that's been granted.
508 				 */
509 				nmp = NFSTONMP(np);
510 				if ((msgreq->lmr_errno == ENOTSUP) && nmp &&
511 				    (nmp->nm_state & NFSSTA_LOCKSWORK)) {
512 					/*
513 					 * We have evidence that locks work, yet lockd
514 					 * returned ENOTSUP.  This is probably because
515 					 * it was unable to contact the server's lockd
516 					 * to send it the request.
517 					 *
518 					 * Because we know locks work, we'll consider
519 					 * this failure to be a timeout.
520 					 */
521 					error = EWOULDBLOCK;
522 				} else {
523 					error = 0;
524 				}
525 				break;
526 			}
527 			if (error != EWOULDBLOCK) {
528 				break;
529 			}
530 			/* check that we still have our mount... */
531 			/* ...and that we still support locks */
532 			/* ...and that there isn't a recovery pending */
533 			nmp = NFSTONMP(np);
534 			if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
535 				error = error2;
536 				if (type == F_UNLCK) {
537 					printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
538 				}
539 				break;
540 			}
541 			lck_mtx_lock(&nmp->nm_lock);
542 			if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
543 				lck_mtx_unlock(&nmp->nm_lock);
544 				break;
545 			}
546 			if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
547 				/* recovery pending... return an error that'll get this operation restarted */
548 				error = NFSERR_GRACE;
549 				lck_mtx_unlock(&nmp->nm_lock);
550 				break;
551 			}
552 			interruptable = NMFLAG(nmp, INTR);
553 			lck_mtx_unlock(&nmp->nm_lock);
554 			microuptime(&now);
555 		}
556 		if (error) {
557 			/* check that we still have our mount... */
558 			nmp = NFSTONMP(np);
559 			if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
560 				error = error2;
561 				if (error2 != EINTR) {
562 					if (type == F_UNLCK) {
563 						printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
564 					}
565 					break;
566 				}
567 			}
568 			/* ...and that we still support locks */
569 			lck_mtx_lock(&nmp->nm_lock);
570 			if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
571 				if (error == EWOULDBLOCK) {
572 					error = ENOTSUP;
573 				}
574 				lck_mtx_unlock(&nmp->nm_lock);
575 				break;
576 			}
577 			/* ...and that there isn't a recovery pending */
578 			if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
579 				/* recovery pending... return to allow recovery to occur */
580 				error = NFSERR_DENIED;
581 				lck_mtx_unlock(&nmp->nm_lock);
582 				break;
583 			}
584 			interruptable = NMFLAG(nmp, INTR);
585 			if ((error != EWOULDBLOCK) ||
586 			    ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) ||
587 			    ((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) {
588 				if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) {
589 					/* give up if this is for recovery and taking too long */
590 					error = ETIMEDOUT;
591 				} else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
592 					/* recovery pending... return an error that'll get this operation restarted */
593 					error = NFSERR_GRACE;
594 				}
595 				lck_mtx_unlock(&nmp->nm_lock);
596 				/*
597 				 * We're going to bail on this request.
598 				 * If we were a blocked lock request, send a cancel.
599 				 */
600 				if ((msgreq->lmr_errno == EINPROGRESS) &&
601 				    !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
602 					/* set this request up as a cancel */
603 					msg->lm_flags |= LOCKD_MSG_CANCEL;
604 					nfs_lockdmsg_dequeue(msgreq);
605 					msg->lm_xid = nfs_lockxid_get();
606 					nfs_lockdmsg_enqueue(msgreq);
607 					msgreq->lmr_saved_errno = error;
608 					msgreq->lmr_errno = 0;
609 					msgreq->lmr_answered = 0;
610 					/* reset timeout */
611 					timeo = 2;
612 					/* send cancel request */
613 					continue;
614 				}
615 				break;
616 			}
617 
618 			/* warn if we're not getting any response */
619 			microuptime(&now);
620 			if ((msgreq->lmr_errno != EINPROGRESS) &&
621 			    !(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) &&
622 			    (nmp->nm_tprintf_initial_delay != 0) &&
623 			    ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
624 				lck_mtx_unlock(&nmp->nm_lock);
625 				lastmsg = now.tv_sec;
626 				lck_mtx_unlock(&nfs_lock_mutex);
627 				nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding", 1);
628 				lck_mtx_lock(&nfs_lock_mutex);
629 				wentdown = 1;
630 			} else {
631 				lck_mtx_unlock(&nmp->nm_lock);
632 			}
633 
634 			if (msgreq->lmr_errno == EINPROGRESS) {
635 				/*
636 				 * We've got a blocked lock request that we are
637 				 * going to retry.  First, we'll want to try to
638 				 * send a cancel for the previous request.
639 				 *
640 				 * Clear errno so if we don't get a response
641 				 * to the resend we'll call nfs_down().
642 				 * Also reset timeout because we'll expect a
643 				 * quick response to the cancel/resend (even if
644 				 * it is NLM_BLOCKED).
645 				 */
646 				msg->lm_flags |= LOCKD_MSG_CANCEL;
647 				nfs_lockdmsg_dequeue(msgreq);
648 				msg->lm_xid = nfs_lockxid_get();
649 				nfs_lockdmsg_enqueue(msgreq);
650 				msgreq->lmr_saved_errno = msgreq->lmr_errno;
651 				msgreq->lmr_errno = 0;
652 				msgreq->lmr_answered = 0;
653 				timeo = 2;
654 				/* send cancel then resend request */
655 				continue;
656 			}
657 
658 			/*
659 			 * We timed out, so we will resend the request.
660 			 */
661 			if (!(flags & R_RECOVER)) {
662 				timeo *= 2;
663 			}
664 			if (timeo > 30) {
665 				timeo = 30;
666 			}
667 			/* resend request */
668 			continue;
669 		}
670 
671 		/* we got a reponse, so the server's lockd is OK */
672 		nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO,
673 		    wentdown ? "lockd alive again" : NULL);
674 		wentdown = 0;
675 
676 		if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) {
677 			/*
678 			 * The lock request was denied because the server lockd is
679 			 * still in its grace period.  So, we need to try the
680 			 * request again in a little bit.  Return the GRACE error so
681 			 * the higher levels can perform the retry.
682 			 */
683 			msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE;
684 		}
685 
686 		if (msgreq->lmr_errno == EINPROGRESS) {
687 			/* got NLM_BLOCKED response */
688 			/* need to wait for NLM_GRANTED */
689 			timeo = 30;
690 			msgreq->lmr_answered = 0;
691 			goto wait_for_granted;
692 		}
693 
694 		if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
695 		    (msgreq->lmr_saved_errno == EINPROGRESS)) {
696 			/*
697 			 * We just got a successful reply to the
698 			 * cancel of the previous blocked lock request.
699 			 * Now, go ahead and return a DENIED error so the
700 			 * higher levels can resend the request.
701 			 */
702 			msg->lm_flags &= ~LOCKD_MSG_CANCEL;
703 			error = NFSERR_DENIED;
704 			/* Will dequeue msgreq after the following break at the end of this routine */
705 			break;
706 		}
707 
708 		/*
709 		 * If the blocked lock request was cancelled.
710 		 * Restore the error condition from when we
711 		 * originally bailed on the request.
712 		 */
713 		if (msg->lm_flags & LOCKD_MSG_CANCEL) {
714 			msg->lm_flags &= ~LOCKD_MSG_CANCEL;
715 			error = msgreq->lmr_saved_errno;
716 		} else {
717 			error = msgreq->lmr_errno;
718 		}
719 
720 		nmp = NFSTONMP(np);
721 		if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) {
722 			/*
723 			 * We have NO evidence that locks work and lockd
724 			 * returned ENOTSUP.  Let's take this as a hint
725 			 * that locks aren't supported and disable them
726 			 * for this mount.
727 			 */
728 			nfs_lockdmsg_dequeue(msgreq);
729 			lck_mtx_unlock(&nfs_lock_mutex);
730 			lck_mtx_lock(&nmp->nm_lock);
731 			if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) {
732 				nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
733 				nfs_lockd_mount_unregister(nmp);
734 			}
735 			nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
736 			lck_mtx_unlock(&nmp->nm_lock);
737 			printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n",
738 			    vfs_statfs(nmp->nm_mountp)->f_mntfromname);
739 			return error;
740 		}
741 		if (!error) {
742 			/* record that NFS file locking has worked on this mount */
743 			if (nmp) {
744 				lck_mtx_lock(&nmp->nm_lock);
745 				if (!(nmp->nm_state & NFSSTA_LOCKSWORK)) {
746 					nmp->nm_state |= NFSSTA_LOCKSWORK;
747 				}
748 				lck_mtx_unlock(&nmp->nm_lock);
749 			}
750 		}
751 		break;
752 	}
753 
754 	nfs_lockdmsg_dequeue(msgreq);
755 
756 	lck_mtx_unlock(&nfs_lock_mutex);
757 
758 	return error;
759 }
760 
761 /*
762  * Send an NLM LOCK message to the server
763  */
764 int
nfs3_setlock_rpc(nfsnode_t np,struct nfs_open_file * nofp,struct nfs_file_lock * nflp,int reclaim,int flags,thread_t thd,kauth_cred_t cred)765 nfs3_setlock_rpc(
766 	nfsnode_t np,
767 	struct nfs_open_file *nofp,
768 	struct nfs_file_lock *nflp,
769 	int reclaim,
770 	int flags,
771 	thread_t thd,
772 	kauth_cred_t cred)
773 {
774 	struct nfs_lock_owner *nlop = nflp->nfl_owner;
775 	struct nfsmount *nmp;
776 	int error;
777 	LOCKD_MSG_REQUEST msgreq;
778 	LOCKD_MSG *msg;
779 
780 	nmp = NFSTONMP(np);
781 	if (nfs_mount_gone(nmp)) {
782 		return ENXIO;
783 	}
784 
785 	if (!nlop->nlo_open_owner) {
786 		nfs_open_owner_ref(nofp->nof_owner);
787 		nlop->nlo_open_owner = nofp->nof_owner;
788 	}
789 	if ((error = nfs_lock_owner_set_busy(nlop, thd))) {
790 		return error;
791 	}
792 
793 	/* set up lock message request structure */
794 	bzero(&msgreq, sizeof(msgreq));
795 	msg = &msgreq.lmr_msg;
796 	msg->lm_version = LOCKD_MSG_VERSION;
797 	if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim) {
798 		msg->lm_flags |= LOCKD_MSG_BLOCK;
799 	}
800 	if (reclaim) {
801 		msg->lm_flags |= LOCKD_MSG_RECLAIM;
802 	}
803 	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
804 	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
805 	cru2x(cred, &msg->lm_cred);
806 
807 	msg->lm_fl.l_whence = SEEK_SET;
808 	msg->lm_fl.l_start = nflp->nfl_start;
809 	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
810 	msg->lm_fl.l_type = nflp->nfl_type;
811 	msg->lm_fl.l_pid = nlop->nlo_pid;
812 
813 	error = nfs3_lockd_request(np, 0, &msgreq, flags, thd);
814 
815 	nfs_lock_owner_clear_busy(nlop);
816 	return error;
817 }
818 
819 /*
820  * Send an NLM UNLOCK message to the server
821  */
822 int
nfs3_unlock_rpc(nfsnode_t np,struct nfs_lock_owner * nlop,__unused int type,uint64_t start,uint64_t end,int flags,thread_t thd,kauth_cred_t cred)823 nfs3_unlock_rpc(
824 	nfsnode_t np,
825 	struct nfs_lock_owner *nlop,
826 	__unused int type,
827 	uint64_t start,
828 	uint64_t end,
829 	int flags,
830 	thread_t thd,
831 	kauth_cred_t cred)
832 {
833 	struct nfsmount *nmp;
834 	LOCKD_MSG_REQUEST msgreq;
835 	LOCKD_MSG *msg;
836 
837 	nmp = NFSTONMP(np);
838 	if (!nmp) {
839 		return ENXIO;
840 	}
841 
842 	/* set up lock message request structure */
843 	bzero(&msgreq, sizeof(msgreq));
844 	msg = &msgreq.lmr_msg;
845 	msg->lm_version = LOCKD_MSG_VERSION;
846 	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
847 	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
848 	cru2x(cred, &msg->lm_cred);
849 
850 	msg->lm_fl.l_whence = SEEK_SET;
851 	msg->lm_fl.l_start = start;
852 	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
853 	msg->lm_fl.l_type = F_UNLCK;
854 	msg->lm_fl.l_pid = nlop->nlo_pid;
855 
856 	return nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd);
857 }
858 
859 /*
860  * Send an NLM LOCK TEST message to the server
861  */
862 int
nfs3_getlock_rpc(nfsnode_t np,struct nfs_lock_owner * nlop,struct flock * fl,uint64_t start,uint64_t end,vfs_context_t ctx)863 nfs3_getlock_rpc(
864 	nfsnode_t np,
865 	struct nfs_lock_owner *nlop,
866 	struct flock *fl,
867 	uint64_t start,
868 	uint64_t end,
869 	vfs_context_t ctx)
870 {
871 	struct nfsmount *nmp;
872 	int error;
873 	LOCKD_MSG_REQUEST msgreq;
874 	LOCKD_MSG *msg;
875 
876 	nmp = NFSTONMP(np);
877 	if (nfs_mount_gone(nmp)) {
878 		return ENXIO;
879 	}
880 
881 	/* set up lock message request structure */
882 	bzero(&msgreq, sizeof(msgreq));
883 	msg = &msgreq.lmr_msg;
884 	msg->lm_version = LOCKD_MSG_VERSION;
885 	msg->lm_flags |= LOCKD_MSG_TEST;
886 	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
887 	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
888 	cru2x(vfs_context_ucred(ctx), &msg->lm_cred);
889 
890 	msg->lm_fl.l_whence = SEEK_SET;
891 	msg->lm_fl.l_start = start;
892 	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
893 	msg->lm_fl.l_type = fl->l_type;
894 	msg->lm_fl.l_pid = nlop->nlo_pid;
895 
896 	error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx));
897 
898 	if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) {
899 		if (msg->lm_fl.l_type != F_UNLCK) {
900 			fl->l_type = msg->lm_fl.l_type;
901 			fl->l_pid = msg->lm_fl.l_pid;
902 			fl->l_start = msg->lm_fl.l_start;
903 			fl->l_len = msg->lm_fl.l_len;
904 			fl->l_whence = SEEK_SET;
905 		} else {
906 			fl->l_type = F_UNLCK;
907 		}
908 	}
909 
910 	return error;
911 }
912 
913 /*
914  * nfslockdans --
915  *      NFS advisory byte-level locks answer from the lock daemon.
916  */
917 int
nfslockdans(proc_t p,struct lockd_ans * ansp)918 nfslockdans(proc_t p, struct lockd_ans *ansp)
919 {
920 	LOCKD_MSG_REQUEST *msgreq;
921 	int error;
922 
923 	/* Let root make this call. */
924 	error = proc_suser(p);
925 	if (error) {
926 		return error;
927 	}
928 
929 	/* the version should match, or we're out of sync */
930 	if (ansp->la_version != LOCKD_ANS_VERSION) {
931 		return EINVAL;
932 	}
933 
934 	lck_mtx_lock(&nfs_lock_mutex);
935 
936 	/* try to find the lockd message by transaction id (cookie) */
937 	msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
938 	if (ansp->la_flags & LOCKD_ANS_GRANTED) {
939 		/*
940 		 * We can't depend on the granted message having our cookie,
941 		 * so we check the answer against the lockd message found.
942 		 * If no message was found or it doesn't match the answer,
943 		 * we look for the lockd message by the answer's lock info.
944 		 */
945 		if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp)) {
946 			msgreq = nfs_lockdmsg_find_by_answer(ansp);
947 		}
948 		/*
949 		 * We need to make sure this request isn't being cancelled
950 		 * If it is, we don't want to accept the granted message.
951 		 */
952 		if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL)) {
953 			msgreq = NULL;
954 		}
955 	}
956 	if (!msgreq) {
957 		lck_mtx_unlock(&nfs_lock_mutex);
958 		return EPIPE;
959 	}
960 
961 	msgreq->lmr_errno = ansp->la_errno;
962 	if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
963 		if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
964 			if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL) {
965 				msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
966 			} else {
967 				msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
968 			}
969 			msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
970 			msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
971 			msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
972 		} else {
973 			msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
974 		}
975 	}
976 	if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE) {
977 		msgreq->lmr_msg.lm_flags |= LOCKD_MSG_DENIED_GRACE;
978 	}
979 
980 	msgreq->lmr_answered = 1;
981 	lck_mtx_unlock(&nfs_lock_mutex);
982 	wakeup(msgreq);
983 
984 	return 0;
985 }
986 
987 /*
988  * nfslockdnotify --
989  *      NFS host restart notification from the lock daemon.
990  *
991  * Used to initiate reclaiming of held locks when a server we
992  * have mounted reboots.
993  */
994 int
nfslockdnotify(proc_t p,user_addr_t argp)995 nfslockdnotify(proc_t p, user_addr_t argp)
996 {
997 	int error, i, headsize;
998 	struct lockd_notify ln;
999 	struct nfsmount *nmp;
1000 	struct sockaddr *saddr;
1001 
1002 	/* Let root make this call. */
1003 	error = proc_suser(p);
1004 	if (error) {
1005 		return error;
1006 	}
1007 
1008 	headsize = (char*)&ln.ln_addr[0] - (char*)&ln.ln_version;
1009 	error = copyin(argp, &ln, headsize);
1010 	if (error) {
1011 		return error;
1012 	}
1013 	if (ln.ln_version != LOCKD_NOTIFY_VERSION) {
1014 		return EINVAL;
1015 	}
1016 	if ((ln.ln_addrcount < 1) || (ln.ln_addrcount > 128)) {
1017 		return EINVAL;
1018 	}
1019 	argp += headsize;
1020 	saddr = (struct sockaddr *)&ln.ln_addr[0];
1021 
1022 	lck_mtx_lock(&nfs_lock_mutex);
1023 
1024 	for (i = 0; i < ln.ln_addrcount; i++) {
1025 		error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0]));
1026 		if (error) {
1027 			break;
1028 		}
1029 		argp += sizeof(ln.ln_addr[0]);
1030 		/* scan lockd mount list for match to this address */
1031 		TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) {
1032 			/* check if address matches this mount's server address */
1033 			if (!nmp->nm_saddr || nfs_sockaddr_cmp(saddr, nmp->nm_saddr)) {
1034 				continue;
1035 			}
1036 			/* We have a match!  Mark it as needing recovery. */
1037 			lck_mtx_lock(&nmp->nm_lock);
1038 			nfs_need_recover(nmp, 0);
1039 			lck_mtx_unlock(&nmp->nm_lock);
1040 		}
1041 	}
1042 
1043 	lck_mtx_unlock(&nfs_lock_mutex);
1044 
1045 	return error;
1046 }
1047 
1048 #endif /* CONFIG_NFS_CLIENT */
1049