1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002 Cluster File Systems, Inc.
6 * This code is issued under the GNU General Public License.
7 * See the file COPYING in this distribution
9 * by Cluster File Systems, Inc.
10 * authors, Peter Braam <braam@clusterfs.com> &
11 * Phil Schwan <phil@clusterfs.com>
14 #define DEBUG_SUBSYSTEM S_LDLM
16 #include <linux/slab.h>
17 #include <linux/module.h>
18 #include <linux/random.h>
19 #include <linux/lustre_dlm.h>
20 #include <linux/lustre_mds.h>
22 kmem_cache_t *ldlm_lock_slab;
23 int (*mds_reint_p)(int offset, struct ptlrpc_request *req) = NULL;
24 int (*mds_getattr_name_p)(int offset, struct ptlrpc_request *req) = NULL;
26 static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b);
27 static int ldlm_intent_policy(struct ldlm_lock *lock, void *req_cookie,
28 ldlm_mode_t mode, void *data);
30 ldlm_res_compat ldlm_res_compat_table [] = {
31 [LDLM_PLAIN] ldlm_plain_compat,
32 [LDLM_EXTENT] ldlm_extent_compat,
33 [LDLM_MDSINTENT] ldlm_plain_compat
36 ldlm_res_policy ldlm_res_policy_table [] = {
38 [LDLM_EXTENT] ldlm_extent_policy,
39 [LDLM_MDSINTENT] ldlm_intent_policy
42 void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh)
44 lockh->addr = (__u64)(unsigned long)lock;
45 lockh->cookie = lock->l_random;
48 struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *handle)
50 struct ldlm_lock *lock = NULL;
55 lock = (struct ldlm_lock *)(unsigned long)(handle->addr);
57 if (!kmem_cache_validate(ldlm_lock_slab, (void *)lock))
60 l_lock(&lock->l_resource->lr_namespace->ns_lock);
61 if (lock->l_random != handle->cookie)
62 GOTO(out, handle = NULL);
64 if (lock->l_flags & LDLM_FL_DESTROYED)
65 GOTO(out, handle = NULL);
70 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
74 struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock)
76 l_lock(&lock->l_resource->lr_namespace->ns_lock);
78 ldlm_resource_getref(lock->l_resource);
79 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
83 void ldlm_lock_put(struct ldlm_lock *lock)
85 struct lustre_lock *nslock = &lock->l_resource->lr_namespace->ns_lock;
93 ldlm_resource_put(lock->l_resource);
95 ldlm_lock_put(lock->l_parent);
97 if (lock->l_refc == 0 && (lock->l_flags & LDLM_FL_DESTROYED)) {
98 if (lock->l_connection)
99 ptlrpc_put_connection(lock->l_connection);
100 kmem_cache_free(ldlm_lock_slab, lock);
107 void ldlm_lock_destroy(struct ldlm_lock *lock)
110 l_lock(&lock->l_resource->lr_namespace->ns_lock);
112 if (!list_empty(&lock->l_children)) {
113 CERROR("lock %p still has children (%p)!\n", lock,
114 lock->l_children.next);
115 ldlm_lock_dump(lock);
118 if (lock->l_readers || lock->l_writers) {
119 CDEBUG(D_INFO, "lock still has references (%d readers, %d "
120 "writers)\n", lock->l_readers, lock->l_writers);
124 if (!list_empty(&lock->l_res_link))
127 lock->l_flags = LDLM_FL_DESTROYED;
128 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
134 usage: pass in a resource on which you have done get
135 pass in a parent lock on which you have done a get
136 do not put the resource or the parent
137 returns: lock with refcount 1
139 static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
140 struct ldlm_resource *resource)
142 struct ldlm_lock *lock;
145 if (resource == NULL)
148 lock = kmem_cache_alloc(ldlm_lock_slab, SLAB_KERNEL);
152 memset(lock, 0, sizeof(*lock));
153 get_random_bytes(&lock->l_cookie, sizeof(__u64));
155 lock->l_resource = resource;
157 INIT_LIST_HEAD(&lock->l_children);
158 INIT_LIST_HEAD(&lock->l_res_link);
159 init_waitqueue_head(&lock->l_waitq);
161 if (parent != NULL) {
162 l_lock(&parent->l_resource->lr_namespace->ns_lock);
163 lock->l_parent = parent;
164 list_add(&lock->l_childof, &parent->l_children);
165 l_unlock(&parent->l_resource->lr_namespace->ns_lock);
170 int ldlm_lock_change_resource(struct ldlm_lock *lock, __u64 new_resid[3])
172 struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
176 l_lock(&ns->ns_lock);
177 type = lock->l_resource->lr_type;
179 lock->l_resource = ldlm_resource_get(ns, NULL, new_resid, type, 1);
180 if (lock->l_resource == NULL) {
185 /* move references over */
186 for (i = 0; i < lock->l_refc; i++) {
188 ldlm_resource_getref(lock->l_resource);
189 rc = ldlm_resource_put(lock->l_resource);
190 if (rc == 1 && i != lock->l_refc - 1)
194 l_unlock(&ns->ns_lock);
198 static int ldlm_intent_policy(struct ldlm_lock *lock, void *req_cookie,
199 ldlm_mode_t mode, void *data)
201 struct ptlrpc_request *req = req_cookie;
208 if (req->rq_reqmsg->bufcount > 1) {
209 /* an intent needs to be considered */
210 struct ldlm_intent *it = lustre_msg_buf(req->rq_reqmsg, 1);
211 struct mds_body *mds_rep;
212 struct ldlm_reply *rep;
213 __u64 new_resid[3] = {0, 0, 0}, old_res;
214 int bufcount = -1, rc, size[3] = {sizeof(struct ldlm_reply),
215 sizeof(struct mds_body),
216 sizeof(struct obdo)};
218 it->opc = NTOH__u64(it->opc);
220 LDLM_DEBUG(lock, "intent policy, opc: %Ld", it->opc);
225 /* Note that in the negative case you may be returning
226 * a file and its obdo */
228 case IT_CREAT|IT_OPEN:
239 size[1] = sizeof(struct obdo);
248 rc = lustre_pack_msg(bufcount, size, NULL, &req->rq_replen,
251 rc = req->rq_status = -ENOMEM;
255 rep = lustre_msg_buf(req->rq_repmsg, 0);
256 rep->lock_policy_res1 = 1;
261 case IT_CREAT|IT_OPEN:
270 if (mds_reint_p == NULL)
272 inter_module_get_request
273 ("mds_reint", "mds");
274 if (IS_ERR(mds_reint_p)) {
275 CERROR("MDSINTENT locks require the MDS "
280 rc = mds_reint_p(2, req);
288 if (mds_getattr_name_p == NULL)
290 inter_module_get_request
291 ("mds_getattr_name", "mds");
292 if (IS_ERR(mds_getattr_name_p)) {
293 CERROR("MDSINTENT locks require the MDS "
298 rc = mds_getattr_name_p(2, req);
304 case IT_READDIR|IT_OPEN:
308 CERROR("Unhandled intent\n");
312 if (it->opc == IT_UNLINK || it->opc == IT_RMDIR)
313 RETURN(ELDLM_LOCK_ABORTED);
315 mds_rep = lustre_msg_buf(req->rq_repmsg, 1);
316 rep->lock_policy_res2 = req->rq_status;
317 new_resid[0] = mds_rep->ino;
318 old_res = lock->l_resource->lr_name[0];
320 CDEBUG(D_INFO, "remote intent: locking %d instead of"
321 "%ld\n", mds_rep->ino, (long)old_res);
323 ldlm_lock_change_resource(lock, new_resid);
324 if (lock->l_resource == NULL) {
328 LDLM_DEBUG(lock, "intent policy, old res %ld",
330 RETURN(ELDLM_LOCK_CHANGED);
332 int size = sizeof(struct ldlm_reply);
333 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
336 CERROR("out of memory\n");
344 static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b)
346 return lockmode_compat(a->l_req_mode, b->l_req_mode);
349 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
351 ldlm_res2desc(lock->l_resource, &desc->l_resource);
352 desc->l_req_mode = lock->l_req_mode;
353 desc->l_granted_mode = lock->l_granted_mode;
354 memcpy(&desc->l_extent, &lock->l_extent, sizeof(desc->l_extent));
355 memcpy(desc->l_version, lock->l_version, sizeof(desc->l_version));
358 static int ldlm_send_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock *new)
360 struct ldlm_lock_desc desc;
361 struct ptlrpc_request *req = NULL;
364 l_lock(&lock->l_resource->lr_namespace->ns_lock);
365 if (lock->l_flags & LDLM_FL_AST_SENT) {
366 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
370 lock->l_flags |= LDLM_FL_AST_SENT;
371 /* FIXME: this should merely add the lock to the lr_tmp list */
372 ldlm_lock2desc(new, &desc);
373 lock->l_blocking_ast(lock, &desc, lock->l_data, lock->l_data_len, &req);
374 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
377 struct list_head *list = lock->l_resource->lr_tmp;
378 list_add(&req->rq_multi, list);
383 /* Args: unlocked lock */
384 void ldlm_lock_addref(struct ldlm_lock *lock, __u32 mode)
386 l_lock(&lock->l_resource->lr_namespace->ns_lock);
387 if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR)
391 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
395 /* Args: unlocked lock */
396 void ldlm_lock_decref(struct ldlm_lock *lock, __u32 mode)
403 l_lock(&lock->l_resource->lr_namespace->ns_lock);
404 if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR)
409 /* If we received a blocked AST and this was the last reference,
410 * run the callback. */
411 if (!lock->l_readers && !lock->l_writers &&
412 (lock->l_flags & LDLM_FL_CBPENDING)) {
413 if (!lock->l_resource->lr_namespace->ns_client) {
414 CERROR("LDLM_FL_CBPENDING set on non-local lock!\n");
418 CDEBUG(D_INFO, "final decref done on cbpending lock, "
419 "calling callback.\n");
420 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
422 lock->l_blocking_ast(lock, NULL, lock->l_data,
423 lock->l_data_len, NULL);
425 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
432 static int ldlm_lock_compat_list(struct ldlm_lock *lock, int send_cbs,
433 struct list_head *queue)
435 struct list_head *tmp, *pos;
438 list_for_each_safe(tmp, pos, queue) {
439 struct ldlm_lock *child;
440 ldlm_res_compat compat;
442 child = list_entry(tmp, struct ldlm_lock, l_res_link);
446 compat = ldlm_res_compat_table[child->l_resource->lr_type];
447 if (compat && compat(child, lock)) {
448 CDEBUG(D_OTHER, "compat function succeded, next.\n");
451 if (lockmode_compat(child->l_granted_mode, lock->l_req_mode)) {
452 CDEBUG(D_OTHER, "lock modes are compatible, next.\n");
458 CDEBUG(D_OTHER, "compat function failed and lock modes "
460 if (send_cbs && child->l_blocking_ast != NULL) {
461 CDEBUG(D_OTHER, "incompatible; sending blocking "
463 /* It's very difficult to actually send the AST from
464 * here, because we'd have to drop the lock before going
465 * to sleep to wait for the reply. Instead we build the
466 * packet and send it later. */
467 ldlm_send_blocking_ast(child, lock);
474 static int ldlm_lock_compat(struct ldlm_lock *lock, int send_cbs)
479 l_lock(&lock->l_resource->lr_namespace->ns_lock);
480 rc = ldlm_lock_compat_list(lock, send_cbs, &lock->l_resource->lr_granted);
481 /* FIXME: should we be sending ASTs to converting? */
483 rc = ldlm_lock_compat_list
484 (lock, send_cbs, &lock->l_resource->lr_converting);
486 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
491 - ldlm_handle_enqueuque - resource
493 void ldlm_grant_lock(struct ldlm_lock *lock)
495 struct ldlm_resource *res = lock->l_resource;
496 struct ptlrpc_request *req = NULL;
499 l_lock(&lock->l_resource->lr_namespace->ns_lock);
500 ldlm_resource_add_lock(res, &res->lr_granted, lock);
501 lock->l_granted_mode = lock->l_req_mode;
503 if (lock->l_granted_mode < res->lr_most_restr)
504 res->lr_most_restr = lock->l_granted_mode;
506 if (lock->l_completion_ast) {
507 /* FIXME: this should merely add lock to lr_tmp list */
508 lock->l_completion_ast(lock, NULL, lock->l_data,
509 lock->l_data_len, &req);
511 struct list_head *list = res->lr_tmp;
516 list_add(&req->rq_multi, list);
519 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
523 static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
524 struct ldlm_extent *extent)
526 struct ldlm_lock *lock;
527 struct list_head *tmp;
529 list_for_each(tmp, queue) {
530 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
532 if (lock->l_flags & LDLM_FL_CBPENDING)
535 /* lock_convert() takes the resource lock, so we're sure that
536 * req_mode, lr_type, and l_cookie won't change beneath us */
537 if (lock->l_req_mode != mode)
540 if (lock->l_resource->lr_type == LDLM_EXTENT &&
541 (lock->l_extent.start > extent->start ||
542 lock->l_extent.end < extent->end))
545 ldlm_lock_addref(lock, mode);
552 /* Must be called with no resource or lock locks held.
554 * Returns 1 if it finds an already-existing lock that is compatible; in this
555 * case, lockh is filled in with a addref()ed lock */
556 int ldlm_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type,
557 void *cookie, int cookielen, ldlm_mode_t mode,
558 struct lustre_handle *lockh)
560 struct ldlm_resource *res;
561 struct ldlm_lock *lock;
565 res = ldlm_resource_get(ns, NULL, res_id, type, 0);
569 ns = res->lr_namespace;
570 l_lock(&ns->ns_lock);
572 if ((lock = search_queue(&res->lr_granted, mode, cookie)))
574 if ((lock = search_queue(&res->lr_converting, mode, cookie)))
576 if ((lock = search_queue(&res->lr_waiting, mode, cookie)))
581 ldlm_resource_put(res);
582 l_unlock(&ns->ns_lock);
585 wait_event_interruptible(lock->l_waitq, lock->l_req_mode ==
586 lock->l_granted_mode);
591 /* Must be called without the resource lock held. Returns a referenced,
592 * unlocked ldlm_lock. */
593 struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
594 struct lustre_handle *parent_lock_handle,
595 __u64 *res_id, __u32 type,
600 struct ldlm_resource *res, *parent_res = NULL;
601 struct ldlm_lock *lock, *parent_lock;
603 parent_lock = lustre_handle2object(parent_lock_handle);
605 parent_res = parent_lock->l_resource;
607 res = ldlm_resource_get(ns, parent_res, res_id, type, 1);
611 lock = ldlm_lock_new(parent_lock, res);
613 ldlm_resource_put(res);
617 lock->l_req_mode = mode;
619 lock->l_data_len = data_len;
620 ldlm_lock_addref(lock, mode);
625 /* Must be called with lock->l_lock and lock->l_resource->lr_lock not held */
626 ldlm_error_t ldlm_lock_enqueue(struct ldlm_lock *lock,
627 void *cookie, int cookie_len,
629 ldlm_lock_callback completion,
630 ldlm_lock_callback blocking)
632 struct ldlm_resource *res;
634 ldlm_res_policy policy;
637 res = lock->l_resource;
638 local = res->lr_namespace->ns_client;
640 lock->l_blocking_ast = blocking;
642 if (res->lr_type == LDLM_EXTENT)
643 memcpy(&lock->l_extent, cookie, sizeof(lock->l_extent));
645 /* policies are not executed on the client */
646 if (!local && (policy = ldlm_res_policy_table[res->lr_type])) {
649 /* We do this dancing with refcounts and locks because the
650 * policy function could send an RPC */
651 ldlm_resource_getref(res);
653 rc = policy(lock, cookie, lock->l_req_mode, NULL);
655 if (ldlm_resource_put(res) && rc != ELDLM_LOCK_CHANGED)
656 /* ldlm_resource_put() should not destroy 'res' unless
657 * 'res' is no longer the resource for this lock. */
660 if (rc == ELDLM_LOCK_CHANGED) {
661 res = lock->l_resource;
662 *flags |= LDLM_FL_LOCK_CHANGED;
663 } else if (rc == ELDLM_LOCK_ABORTED) {
664 ldlm_lock_destroy(lock);
669 lock->l_cookie = cookie;
670 lock->l_cookie_len = cookie_len;
672 if (local && lock->l_req_mode == lock->l_granted_mode) {
673 /* The server returned a blocked lock, but it was granted before
674 * we got a chance to actually enqueue it. We don't need to do
679 /* If this is a local resource, put it on the appropriate list. */
680 list_del_init(&lock->l_res_link);
682 if (*flags & LDLM_FL_BLOCK_CONV)
683 ldlm_resource_add_lock(res, res->lr_converting.prev,
685 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
686 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
688 ldlm_grant_lock(lock);
692 /* FIXME: We may want to optimize by checking lr_most_restr */
693 if (!list_empty(&res->lr_converting)) {
694 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
695 *flags |= LDLM_FL_BLOCK_CONV;
698 if (!list_empty(&res->lr_waiting)) {
699 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
700 *flags |= LDLM_FL_BLOCK_WAIT;
703 if (!ldlm_lock_compat(lock,0)) {
704 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
705 *flags |= LDLM_FL_BLOCK_GRANTED;
709 ldlm_grant_lock(lock);
712 /* Don't set 'completion_ast' until here so that if the lock is granted
713 * immediately we don't do an unnecessary completion call. */
714 lock->l_completion_ast = completion;
718 /* Must be called with namespace taken: queue is waiting or converting. */
719 static int ldlm_reprocess_queue(struct ldlm_resource *res,
720 struct list_head *queue)
722 struct list_head *tmp, *pos;
725 list_for_each_safe(tmp, pos, queue) {
726 struct ldlm_lock *pending;
727 pending = list_entry(tmp, struct ldlm_lock, l_res_link);
729 CDEBUG(D_INFO, "Reprocessing lock %p\n", pending);
731 if (!ldlm_lock_compat(pending, 1))
734 list_del_init(&pending->l_res_link);
735 ldlm_grant_lock(pending);
737 ldlm_lock_addref(pending, pending->l_req_mode);
738 ldlm_lock_decref(pending, pending->l_granted_mode);
744 static void ldlm_send_delayed_asts(struct list_head *rpc_list)
746 struct list_head *tmp, *pos;
749 list_for_each_safe(tmp, pos, rpc_list) {
751 struct ptlrpc_request *req =
752 list_entry(tmp, struct ptlrpc_request, rq_multi);
754 CDEBUG(D_INFO, "Sending callback.\n");
756 rc = ptlrpc_queue_wait(req);
757 rc = ptlrpc_check_status(req, rc);
758 ptlrpc_free_req(req);
760 CERROR("Callback send failed: %d\n", rc);
765 /* Must be called with resource->lr_lock not taken. */
766 void ldlm_reprocess_all(struct ldlm_resource *res)
768 struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
771 /* Local lock trees don't get reprocessed. */
772 if (res->lr_namespace->ns_client) {
777 l_lock(&res->lr_namespace->ns_lock);
778 res->lr_tmp = &rpc_list;
780 ldlm_reprocess_queue(res, &res->lr_converting);
781 if (list_empty(&res->lr_converting))
782 ldlm_reprocess_queue(res, &res->lr_waiting);
785 l_unlock(&res->lr_namespace->ns_lock);
787 ldlm_send_delayed_asts(&rpc_list);
791 /* Must be called with lock and lock->l_resource unlocked */
792 void ldlm_lock_cancel(struct ldlm_lock *lock)
794 struct ldlm_resource *res;
795 struct ldlm_namespace *ns;
798 res = lock->l_resource;
799 ns = res->lr_namespace;
801 l_lock(&ns->ns_lock);
802 if (lock->l_readers || lock->l_writers)
803 CDEBUG(D_INFO, "lock still has references (%d readers, %d "
804 "writers)\n", lock->l_readers, lock->l_writers);
806 ldlm_resource_unlink_lock(lock);
807 ldlm_lock_destroy(lock);
808 l_unlock(&ns->ns_lock);
811 /* Must be called with lock and lock->l_resource unlocked */
812 struct ldlm_resource *ldlm_convert(struct lustre_handle *lockh, int new_mode, int *flags)
814 struct ldlm_lock *lock;
815 struct ldlm_resource *res;
816 struct ldlm_namespace *ns;
819 lock = lustre_handle2object(lockh);
820 res = lock->l_resource;
821 ns = res->lr_namespace;
823 l_lock(&ns->ns_lock);
825 lock->l_req_mode = new_mode;
826 ldlm_resource_unlink_lock(lock);
828 /* If this is a local resource, put it on the appropriate list. */
829 if (res->lr_namespace->ns_client) {
830 if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED))
831 ldlm_resource_add_lock(res, res->lr_converting.prev,
834 ldlm_grant_lock(lock);
835 /* FIXME: completion handling not with ns_lock held ! */
836 wake_up(&lock->l_waitq);
839 list_add(&lock->l_res_link, res->lr_converting.prev);
842 l_unlock(&ns->ns_lock);
847 void ldlm_lock_dump(struct ldlm_lock *lock)
851 if (!(portal_debug & D_OTHER))
854 if (RES_VERSION_SIZE != 4)
858 CDEBUG(D_OTHER, " NULL LDLM lock\n");
862 snprintf(ver, sizeof(ver), "%x %x %x %x",
863 lock->l_version[0], lock->l_version[1],
864 lock->l_version[2], lock->l_version[3]);
866 CDEBUG(D_OTHER, " -- Lock dump: %p (%s)\n", lock, ver);
867 CDEBUG(D_OTHER, " Parent: %p\n", lock->l_parent);
868 CDEBUG(D_OTHER, " Resource: %p (%Ld)\n", lock->l_resource,
869 lock->l_resource->lr_name[0]);
870 CDEBUG(D_OTHER, " Requested mode: %d, granted mode: %d\n",
871 (int)lock->l_req_mode, (int)lock->l_granted_mode);
872 CDEBUG(D_OTHER, " Readers: %u ; Writers; %u\n",
873 lock->l_readers, lock->l_writers);
874 if (lock->l_resource->lr_type == LDLM_EXTENT)
875 CDEBUG(D_OTHER, " Extent: %Lu -> %Lu\n",
876 (unsigned long long)lock->l_extent.start,
877 (unsigned long long)lock->l_extent.end);