1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #define DEBUG_SUBSYSTEM S_LDLM
24 #include <linux/lustre_dlm.h>
25 #include <linux/obd_class.h>
26 #include <linux/obd.h>
28 static int interrupted_completion_wait(void *data)
33 static int expired_completion_wait(void *data)
35 struct ldlm_lock *lock = data;
36 struct ptlrpc_connection *conn;
37 struct obd_device *obd;
40 CERROR("NULL lock\n");
41 else if (!lock->l_connh)
42 CERROR("lock %p has NULL connh\n", lock);
43 else if (!(obd = class_conn2obd(lock->l_connh)))
44 CERROR("lock %p has NULL obd\n", lock);
45 else if (!(conn = obd->u.cli.cl_import.imp_connection))
46 CERROR("lock %p has NULL connection\n", lock);
48 LDLM_DEBUG(lock, "timed out waiting for completion");
49 CERROR("lock %p timed out from %s\n", lock,
51 class_signal_connection_failure(conn);
56 int ldlm_completion_ast(struct ldlm_lock *lock, int flags)
58 struct l_wait_info lwi =
59 LWI_TIMEOUT_INTR(obd_timeout * HZ, expired_completion_wait,
60 interrupted_completion_wait, lock);
64 if (flags == LDLM_FL_WAIT_NOREPROC)
68 wake_up(&lock->l_waitq);
72 if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
76 LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
79 ldlm_reprocess_all(lock->l_resource);
82 /* Go to sleep until the lock is granted or cancelled. */
83 rc = l_wait_event(lock->l_waitq,
84 ((lock->l_req_mode == lock->l_granted_mode) ||
85 lock->l_destroyed), &lwi);
87 if (lock->l_destroyed) {
88 LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
93 LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
98 LDLM_DEBUG(lock, "client-side enqueue waking up: granted");
102 static int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
103 struct lustre_handle *parent_lockh,
106 void *cookie, int cookielen,
109 ldlm_completion_callback completion,
110 ldlm_blocking_callback blocking,
113 struct lustre_handle *lockh)
115 struct ldlm_lock *lock;
120 CERROR("Trying to enqueue local lock in a shadow namespace\n");
124 lock = ldlm_lock_create(ns, parent_lockh, res_id, type, mode, data,
127 GOTO(out_nolock, err = -ENOMEM);
128 LDLM_DEBUG(lock, "client-side local enqueue handler, new lock created");
130 ldlm_lock_addref_internal(lock, mode);
131 ldlm_lock2handle(lock, lockh);
132 lock->l_connh = NULL;
134 err = ldlm_lock_enqueue(lock, cookie, cookielen, flags, completion,
139 if (type == LDLM_EXTENT)
140 memcpy(cookie, &lock->l_extent, sizeof(lock->l_extent));
141 if ((*flags) & LDLM_FL_LOCK_CHANGED)
142 memcpy(res_id, lock->l_resource->lr_name, sizeof(*res_id));
144 LDLM_DEBUG_NOLOCK("client-side local enqueue handler END (lock %p)",
147 if (lock->l_completion_ast)
148 lock->l_completion_ast(lock, *flags);
150 LDLM_DEBUG(lock, "client-side local enqueue END");
158 int ldlm_cli_enqueue(struct lustre_handle *connh,
159 struct ptlrpc_request *req,
160 struct ldlm_namespace *ns,
161 struct lustre_handle *parent_lock_handle,
164 void *cookie, int cookielen,
167 ldlm_completion_callback completion,
168 ldlm_blocking_callback blocking,
171 struct lustre_handle *lockh)
173 struct ldlm_lock *lock;
174 struct ldlm_request *body;
175 struct ldlm_reply *reply;
176 int rc, size = sizeof(*body), req_passed_in = 1, is_replay;
179 is_replay = *flags & LDLM_FL_REPLAY;
180 LASSERT(connh != NULL || !is_replay);
183 return ldlm_cli_enqueue_local(ns, parent_lock_handle, res_id,
184 type, cookie, cookielen, mode,
185 flags, completion, blocking, data,
188 /* If we're replaying this lock, just check some invariants.
189 * If we're creating a new lock, get everything all setup nice. */
191 lock = ldlm_handle2lock(lockh);
192 LDLM_DEBUG(lock, "client-side enqueue START");
193 LASSERT(connh == lock->l_connh);
195 lock = ldlm_lock_create(ns, parent_lock_handle, res_id, type,
196 mode, data, data_len);
198 GOTO(out_nolock, rc = -ENOMEM);
199 LDLM_DEBUG(lock, "client-side enqueue START");
200 /* for the local lock, add the reference */
201 ldlm_lock_addref_internal(lock, mode);
202 ldlm_lock2handle(lock, lockh);
203 if (type == LDLM_EXTENT)
204 memcpy(&lock->l_extent, cookie,
205 sizeof(body->lock_desc.l_extent));
209 req = ptlrpc_prep_req(class_conn2cliimp(connh), LDLM_ENQUEUE, 1,
212 GOTO(out, rc = -ENOMEM);
214 } else if (req->rq_reqmsg->buflens[0] != sizeof(*body))
217 /* Dump lock data into the request buffer */
218 body = lustre_msg_buf(req->rq_reqmsg, 0);
219 ldlm_lock2desc(lock, &body->lock_desc);
220 body->lock_flags = *flags;
222 memcpy(&body->lock_handle1, lockh, sizeof(*lockh));
223 if (parent_lock_handle)
224 memcpy(&body->lock_handle2, parent_lock_handle,
225 sizeof(body->lock_handle2));
227 /* Continue as normal. */
228 if (!req_passed_in) {
229 size = sizeof(*reply);
230 req->rq_replen = lustre_msg_size(1, &size);
232 lock->l_connh = connh;
233 lock->l_export = NULL;
235 LDLM_DEBUG(lock, "sending request");
236 rc = ptlrpc_queue_wait(req);
238 if (rc != ELDLM_OK) {
240 LDLM_DEBUG(lock, "client-side enqueue END (%s)",
241 rc == ELDLM_LOCK_ABORTED ? "ABORTED" : "FAILED");
242 ldlm_lock_decref(lockh, mode);
243 /* FIXME: if we've already received a completion AST, this will
245 ldlm_lock_destroy(lock);
249 reply = lustre_msg_buf(req->rq_repmsg, 0);
250 memcpy(&lock->l_remote_handle, &reply->lock_handle,
251 sizeof(lock->l_remote_handle));
252 *flags = reply->lock_flags;
254 CDEBUG(D_INFO, "local: %p, remote: %p, flags: %d\n", lock,
255 (void *)(unsigned long)reply->lock_handle.addr, *flags);
256 if (type == LDLM_EXTENT) {
257 CDEBUG(D_INFO, "requested extent: "LPU64" -> "LPU64", got "
258 "extent "LPU64" -> "LPU64"\n",
259 body->lock_desc.l_extent.start,
260 body->lock_desc.l_extent.end,
261 reply->lock_extent.start, reply->lock_extent.end);
262 cookie = &reply->lock_extent; /* FIXME bug 267 */
263 cookielen = sizeof(reply->lock_extent);
266 /* If enqueue returned a blocked lock but the completion handler has
267 * already run, then it fixed up the resource and we don't need to do it
269 if ((*flags) & LDLM_FL_LOCK_CHANGED) {
270 int newmode = reply->lock_mode;
272 if (newmode && newmode != lock->l_req_mode) {
273 LDLM_DEBUG(lock, "server returned different mode %s",
274 ldlm_lockname[newmode]);
275 lock->l_req_mode = newmode;
278 if (reply->lock_resource_name[0] !=
279 lock->l_resource->lr_name[0]) {
280 CDEBUG(D_INFO, "remote intent success, locking %ld "
282 (long)reply->lock_resource_name[0],
283 (long)lock->l_resource->lr_name[0]);
285 ldlm_lock_change_resource(lock,
286 reply->lock_resource_name);
287 if (lock->l_resource == NULL) {
291 LDLM_DEBUG(lock, "client-side enqueue, new resource");
296 rc = ldlm_lock_enqueue(lock, cookie, cookielen, flags,
297 completion, blocking);
298 if (lock->l_completion_ast)
299 lock->l_completion_ast(lock, *flags);
303 ptlrpc_req_finished(req);
305 LDLM_DEBUG(lock, "client-side enqueue END");
313 int ldlm_match_or_enqueue(struct lustre_handle *connh,
314 struct ptlrpc_request *req,
315 struct ldlm_namespace *ns,
316 struct lustre_handle *parent_lock_handle,
319 void *cookie, int cookielen,
322 ldlm_completion_callback completion,
323 ldlm_blocking_callback blocking,
326 struct lustre_handle *lockh)
330 rc = ldlm_lock_match(ns, res_id, type, cookie, cookielen, mode, lockh);
332 rc = ldlm_cli_enqueue(connh, req, ns,
333 parent_lock_handle, res_id, type, cookie,
334 cookielen, mode, flags, completion,
335 blocking, data, data_len, lockh);
337 CERROR("ldlm_cli_enqueue: err: %d\n", rc);
343 int ldlm_cli_replay_enqueue(struct ldlm_lock *lock)
345 struct lustre_handle lockh;
346 int flags = LDLM_FL_REPLAY;
347 ldlm_lock2handle(lock, &lockh);
348 return ldlm_cli_enqueue(lock->l_connh, NULL, NULL, NULL, NULL,
349 lock->l_resource->lr_type, NULL, 0, -1, &flags,
350 NULL, NULL, NULL, 0, &lockh);
353 static int ldlm_cli_convert_local(struct ldlm_lock *lock, int new_mode,
357 if (lock->l_resource->lr_namespace->ns_client) {
358 CERROR("Trying to cancel local lock\n");
361 LDLM_DEBUG(lock, "client-side local convert");
363 ldlm_lock_convert(lock, new_mode, flags);
364 ldlm_reprocess_all(lock->l_resource);
366 LDLM_DEBUG(lock, "client-side local convert handler END");
371 /* FIXME: one of ldlm_cli_convert or the server side should reject attempted
372 * conversion of locks which are on the waiting or converting queue */
373 int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags)
375 struct ldlm_request *body;
376 struct lustre_handle *connh;
377 struct ldlm_reply *reply;
378 struct ldlm_lock *lock;
379 struct ldlm_resource *res;
380 struct ptlrpc_request *req;
381 int rc, size = sizeof(*body);
384 lock = ldlm_handle2lock(lockh);
390 connh = lock->l_connh;
393 RETURN(ldlm_cli_convert_local(lock, new_mode, flags));
395 LDLM_DEBUG(lock, "client-side convert");
397 req = ptlrpc_prep_req(class_conn2cliimp(connh), LDLM_CONVERT, 1, &size,
400 GOTO(out, rc = -ENOMEM);
402 body = lustre_msg_buf(req->rq_reqmsg, 0);
403 memcpy(&body->lock_handle1, &lock->l_remote_handle,
404 sizeof(body->lock_handle1));
406 body->lock_desc.l_req_mode = new_mode;
407 body->lock_flags = *flags;
409 size = sizeof(*reply);
410 req->rq_replen = lustre_msg_size(1, &size);
412 rc = ptlrpc_queue_wait(req);
416 reply = lustre_msg_buf(req->rq_repmsg, 0);
417 res = ldlm_lock_convert(lock, new_mode, &reply->lock_flags);
419 ldlm_reprocess_all(res);
420 /* Go to sleep until the lock is granted. */
421 /* FIXME: or cancelled. */
422 if (lock->l_completion_ast)
423 lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC);
427 ptlrpc_req_finished(req);
431 int ldlm_cli_cancel(struct lustre_handle *lockh)
433 struct ptlrpc_request *req;
434 struct ldlm_lock *lock;
435 struct ldlm_request *body;
436 int rc = 0, size = sizeof(*body);
439 /* concurrent cancels on the same handle can happen */
440 lock = __ldlm_handle2lock(lockh, 0, LDLM_FL_CANCELING);
445 LDLM_DEBUG(lock, "client-side cancel");
446 /* Set this flag to prevent others from getting new references*/
447 l_lock(&lock->l_resource->lr_namespace->ns_lock);
448 lock->l_flags |= LDLM_FL_CBPENDING;
449 ldlm_cancel_callback(lock);
450 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
452 req = ptlrpc_prep_req(class_conn2cliimp(lock->l_connh),
453 LDLM_CANCEL, 1, &size, NULL);
455 GOTO(out, rc = -ENOMEM);
457 /* XXX FIXME bug 249 */
458 req->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL;
459 req->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL;
461 body = lustre_msg_buf(req->rq_reqmsg, 0);
462 memcpy(&body->lock_handle1, &lock->l_remote_handle,
463 sizeof(body->lock_handle1));
465 req->rq_replen = lustre_msg_size(0, NULL);
467 rc = ptlrpc_queue_wait(req);
468 ptlrpc_req_finished(req);
472 ldlm_lock_cancel(lock);
474 LDLM_DEBUG(lock, "client-side local cancel");
475 if (lock->l_resource->lr_namespace->ns_client) {
476 CERROR("Trying to cancel local lock\n");
479 ldlm_lock_cancel(lock);
480 ldlm_reprocess_all(lock->l_resource);
481 LDLM_DEBUG(lock, "client-side local cancel handler END");
484 lock->l_flags |= LDLM_FL_CANCELING;
492 int ldlm_cancel_lru(struct ldlm_namespace *ns)
494 struct list_head *tmp, *next, list = LIST_HEAD_INIT(list);
496 struct ldlm_ast_work *w;
499 l_lock(&ns->ns_lock);
500 count = ns->ns_nr_unused - ns->ns_max_unused;
503 l_unlock(&ns->ns_lock);
507 list_for_each_safe(tmp, next, &ns->ns_unused_list) {
508 struct ldlm_lock *lock;
509 lock = list_entry(tmp, struct ldlm_lock, l_lru);
511 LASSERT(!lock->l_readers && !lock->l_writers);
513 /* Setting the CBPENDING flag is a little misleading, but
514 * prevents an important race; namely, once CBPENDING is set,
515 * the lock can accumulate no more readers/writers. Since
516 * readers and writers are already zero here, ldlm_lock_decref
517 * won't see this flag and call l_blocking_ast */
518 lock->l_flags |= LDLM_FL_CBPENDING;
520 OBD_ALLOC(w, sizeof(*w));
523 w->w_lock = LDLM_LOCK_GET(lock);
524 list_add(&w->w_list, &list);
525 ldlm_lock_remove_from_lru(lock);
530 l_unlock(&ns->ns_lock);
532 list_for_each_safe(tmp, next, &list) {
533 struct lustre_handle lockh;
535 w = list_entry(tmp, struct ldlm_ast_work, w_list);
537 ldlm_lock2handle(w->w_lock, &lockh);
538 rc = ldlm_cli_cancel(&lockh);
540 CDEBUG(D_INFO, "ldlm_cli_cancel: %d\n", rc);
542 list_del(&w->w_list);
543 LDLM_LOCK_PUT(w->w_lock);
544 OBD_FREE(w, sizeof(*w));
550 int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
551 __u64 *res_id, int flags)
553 struct ldlm_resource *res;
554 struct list_head *tmp, *next, list = LIST_HEAD_INIT(list);
555 struct ldlm_ast_work *w;
558 res = ldlm_resource_get(ns, NULL, res_id, 0, 0);
560 /* This is not a problem. */
561 CDEBUG(D_INFO, "No resource "LPU64"\n", res_id[0]);
565 l_lock(&ns->ns_lock);
566 list_for_each(tmp, &res->lr_granted) {
567 struct ldlm_lock *lock;
568 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
570 if (lock->l_readers || lock->l_writers)
573 /* See CBPENDING comment in ldlm_cancel_lru */
574 lock->l_flags |= LDLM_FL_CBPENDING;
576 OBD_ALLOC(w, sizeof(*w));
579 w->w_lock = LDLM_LOCK_GET(lock);
580 list_add(&w->w_list, &list);
582 l_unlock(&ns->ns_lock);
584 list_for_each_safe(tmp, next, &list) {
585 struct lustre_handle lockh;
587 w = list_entry(tmp, struct ldlm_ast_work, w_list);
589 /* Prevent the cancel callback from being called by setting
590 * LDLM_FL_CANCEL in the lock. Very sneaky. -p */
591 if (flags & LDLM_FL_NO_CALLBACK)
592 w->w_lock->l_flags |= LDLM_FL_CANCEL;
594 if (flags & LDLM_FL_LOCAL_ONLY) {
595 ldlm_lock_cancel(w->w_lock);
597 ldlm_lock2handle(w->w_lock, &lockh);
598 rc = ldlm_cli_cancel(&lockh);
600 CERROR("ldlm_cli_cancel: %d\n", rc);
602 list_del(&w->w_list);
603 LDLM_LOCK_PUT(w->w_lock);
604 OBD_FREE(w, sizeof(*w));
607 ldlm_resource_putref(res);
612 /* Cancel all locks on a namespace (or a specific resource, if given) that have
615 * If 'local_only' is true, throw the locks away without trying to notify the
617 int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, __u64 *res_id,
624 RETURN(ldlm_cli_cancel_unused_resource(ns, res_id, flags));
626 l_lock(&ns->ns_lock);
627 for (i = 0; i < RES_HASH_SIZE; i++) {
628 struct list_head *tmp, *pos;
629 list_for_each_safe(tmp, pos, &(ns->ns_hash[i])) {
631 struct ldlm_resource *res;
632 res = list_entry(tmp, struct ldlm_resource, lr_hash);
633 ldlm_resource_getref(res);
635 rc = ldlm_cli_cancel_unused_resource(ns, res->lr_name,
639 CERROR("cancel_unused_res ("LPU64"): %d\n",
640 res->lr_name[0], rc);
641 ldlm_resource_putref(res);
644 l_unlock(&ns->ns_lock);
649 /* Lock iterators. */
651 int ldlm_resource_foreach(struct ldlm_resource *res, ldlm_iterator_t iter,
654 struct list_head *tmp, *next;
655 struct ldlm_lock *lock;
656 int rc = LDLM_ITER_CONTINUE;
657 struct ldlm_namespace *ns = res->lr_namespace;
662 RETURN(LDLM_ITER_CONTINUE);
664 l_lock(&ns->ns_lock);
665 list_for_each_safe(tmp, next, &res->lr_granted) {
666 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
668 if (iter(lock, closure) == LDLM_ITER_STOP)
669 GOTO(out, rc = LDLM_ITER_STOP);
672 list_for_each_safe(tmp, next, &res->lr_converting) {
673 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
675 if (iter(lock, closure) == LDLM_ITER_STOP)
676 GOTO(out, rc = LDLM_ITER_STOP);
679 list_for_each_safe(tmp, next, &res->lr_waiting) {
680 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
682 if (iter(lock, closure) == LDLM_ITER_STOP)
683 GOTO(out, rc = LDLM_ITER_STOP);
686 l_unlock(&ns->ns_lock);
690 struct iter_helper_data {
691 ldlm_iterator_t iter;
695 static int ldlm_iter_helper(struct ldlm_lock *lock, void *closure)
697 struct iter_helper_data *helper = closure;
698 return helper->iter(lock, helper->closure);
701 int ldlm_namespace_foreach(struct ldlm_namespace *ns, ldlm_iterator_t iter,
704 int i, rc = LDLM_ITER_CONTINUE;
705 struct iter_helper_data helper = { iter: iter, closure: closure };
707 l_lock(&ns->ns_lock);
708 for (i = 0; i < RES_HASH_SIZE; i++) {
709 struct list_head *tmp, *next;
710 list_for_each_safe(tmp, next, &(ns->ns_hash[i])) {
711 struct ldlm_resource *res =
712 list_entry(tmp, struct ldlm_resource, lr_hash);
714 ldlm_resource_getref(res);
715 rc = ldlm_resource_foreach(res, ldlm_iter_helper,
717 ldlm_resource_putref(res);
718 if (rc == LDLM_ITER_STOP)
723 l_unlock(&ns->ns_lock);
729 static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
731 struct list_head *list = closure;
733 /* we use l_pending_chain here, because it's unused on clients. */
734 list_add(&lock->l_pending_chain, list);
735 return LDLM_ITER_CONTINUE;
738 static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock,
741 struct ptlrpc_request *req;
742 struct ldlm_request *body;
743 struct ldlm_reply *reply;
745 int flags = LDLM_FL_REPLAY;
747 flags |= lock->l_flags &
748 (LDLM_FL_BLOCK_GRANTED|LDLM_FL_BLOCK_CONV|LDLM_FL_BLOCK_WAIT);
750 size = sizeof(*body);
751 req = ptlrpc_prep_req(imp, LDLM_ENQUEUE, 1, &size, NULL);
755 body = lustre_msg_buf(req->rq_reqmsg, 0);
756 ldlm_lock2desc(lock, &body->lock_desc);
757 body->lock_flags = flags;
759 ldlm_lock2handle(lock, &body->lock_handle1);
760 size = sizeof(*reply);
761 req->rq_replen = lustre_msg_size(1, &size);
764 req->rq_reqmsg->flags |= MSG_LAST_REPLAY;
766 LDLM_DEBUG(lock, "replaying lock:");
767 rc = ptlrpc_queue_wait(req);
771 reply = lustre_msg_buf(req->rq_repmsg, 0);
772 memcpy(&lock->l_remote_handle, &reply->lock_handle,
773 sizeof(lock->l_remote_handle));
774 LDLM_DEBUG(lock, "replayed lock:");
776 ptlrpc_req_finished(req);
780 int ldlm_replay_locks(struct obd_import *imp)
782 struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
783 struct list_head list, *pos, *next;
784 struct ldlm_lock *lock;
788 INIT_LIST_HEAD(&list);
790 l_lock(&ns->ns_lock);
791 (void)ldlm_namespace_foreach(ns, ldlm_chain_lock_for_replay, &list);
793 list_for_each_safe(pos, next, &list) {
794 lock = list_entry(pos, struct ldlm_lock, l_pending_chain);
795 rc = replay_one_lock(imp, lock, (next == &list));
797 break; /* or try to do the rest? */
799 l_unlock(&ns->ns_lock);