1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2002 Cluster File Systems, Inc.
5 * Author: Peter Braam <braam@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_LDLM
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <linux/random.h>
29 #include <linux/lustre_dlm.h>
30 #include <linux/lustre_mds.h>
33 char *ldlm_lockname[] = {
42 char *ldlm_typename[] = {
45 [LDLM_MDSINTENT] "INT"
48 char *ldlm_it2str(int it)
55 case (IT_OPEN | IT_CREAT):
84 CERROR("Unknown intent %d\n", it);
89 extern kmem_cache_t *ldlm_lock_slab;
91 static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b);
93 ldlm_res_compat ldlm_res_compat_table[] = {
94 [LDLM_PLAIN] ldlm_plain_compat,
95 [LDLM_EXTENT] ldlm_extent_compat,
96 [LDLM_MDSINTENT] ldlm_plain_compat
99 ldlm_res_policy ldlm_res_policy_table[] = {
101 [LDLM_EXTENT] ldlm_extent_policy,
102 [LDLM_MDSINTENT] NULL
105 void ldlm_register_intent(int (*arg) (struct ldlm_lock * lock, void *req_cookie,
106 ldlm_mode_t mode, void *data))
108 ldlm_res_policy_table[LDLM_MDSINTENT] = arg;
111 void ldlm_unregister_intent(void)
113 ldlm_res_policy_table[LDLM_MDSINTENT] = NULL;
117 * REFCOUNTED LOCK OBJECTS
122 * Lock refcounts, during creation:
123 * - one special one for allocation, dec'd only once in destroy
124 * - one for being a lock that's in-use
125 * - one for the addref associated with a new lock
127 struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock)
129 l_lock(&lock->l_resource->lr_namespace->ns_lock);
131 ldlm_resource_getref(lock->l_resource);
132 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
136 void ldlm_lock_put(struct ldlm_lock *lock)
138 struct lustre_lock *nslock = &lock->l_resource->lr_namespace->ns_lock;
143 //LDLM_DEBUG(lock, "after refc--");
144 if (lock->l_refc < 0)
147 ldlm_resource_put(lock->l_resource);
149 LDLM_LOCK_PUT(lock->l_parent);
151 if (lock->l_refc == 0 && (lock->l_flags & LDLM_FL_DESTROYED)) {
152 lock->l_resource = NULL;
153 LDLM_DEBUG(lock, "final lock_put on destroyed lock, freeing");
154 if (lock->l_connection)
155 ptlrpc_put_connection(lock->l_connection);
156 CDEBUG(D_MALLOC, "kfreed 'lock': %d at %p (tot 1).\n",
157 sizeof(*lock), lock);
158 kmem_cache_free(ldlm_lock_slab, lock);
164 void ldlm_lock_destroy(struct ldlm_lock *lock)
167 l_lock(&lock->l_resource->lr_namespace->ns_lock);
169 if (!list_empty(&lock->l_children)) {
170 LDLM_DEBUG(lock, "still has children (%p)!",
171 lock->l_children.next);
172 ldlm_lock_dump(lock);
175 if (lock->l_readers || lock->l_writers) {
176 LDLM_DEBUG(lock, "lock still has references");
177 ldlm_lock_dump(lock);
181 if (!list_empty(&lock->l_res_link)) {
182 ldlm_lock_dump(lock);
186 if (lock->l_flags & LDLM_FL_DESTROYED) {
187 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
192 lock->l_flags = LDLM_FL_DESTROYED;
193 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
199 usage: pass in a resource on which you have done get
200 pass in a parent lock on which you have done a get
201 do not put the resource or the parent
202 returns: lock with refcount 1
204 static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
205 struct ldlm_resource *resource)
207 struct ldlm_lock *lock;
210 if (resource == NULL)
213 lock = kmem_cache_alloc(ldlm_lock_slab, SLAB_KERNEL);
216 CDEBUG(D_MALLOC, "kmalloced 'lock': %d at "
217 "%p (tot %d).\n", sizeof(*lock), lock, 1);
219 memset(lock, 0, sizeof(*lock));
220 get_random_bytes(&lock->l_random, sizeof(__u64));
222 lock->l_resource = resource;
223 /* this refcount matches the one of the resource passed
224 in which is not being put away */
226 INIT_LIST_HEAD(&lock->l_children);
227 INIT_LIST_HEAD(&lock->l_res_link);
228 init_waitqueue_head(&lock->l_waitq);
230 if (parent != NULL) {
231 l_lock(&parent->l_resource->lr_namespace->ns_lock);
232 lock->l_parent = parent;
233 list_add(&lock->l_childof, &parent->l_children);
234 l_unlock(&parent->l_resource->lr_namespace->ns_lock);
236 /* this is the extra refcount, to prevent the lock
242 int ldlm_lock_change_resource(struct ldlm_lock *lock, __u64 new_resid[3])
244 struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
245 struct ldlm_resource *oldres = lock->l_resource;
249 l_lock(&ns->ns_lock);
250 if (memcmp(new_resid, lock->l_resource->lr_name,
251 sizeof(lock->l_resource->lr_name)) == 0) {
253 l_unlock(&ns->ns_lock);
257 type = lock->l_resource->lr_type;
258 if (new_resid[0] == 0)
260 lock->l_resource = ldlm_resource_get(ns, NULL, new_resid, type, 1);
261 if (lock->l_resource == NULL) {
266 /* move references over */
267 for (i = 0; i < lock->l_refc; i++) {
269 ldlm_resource_getref(lock->l_resource);
270 rc = ldlm_resource_put(oldres);
271 if (rc == 1 && i != lock->l_refc - 1)
274 /* compensate for the initial get above.. */
275 ldlm_resource_put(lock->l_resource);
277 l_unlock(&ns->ns_lock);
285 void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh)
287 lockh->addr = (__u64) (unsigned long)lock;
288 lockh->cookie = lock->l_random;
291 struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *handle)
293 struct ldlm_lock *lock = NULL, *retval = NULL;
296 if (!handle || !handle->addr)
299 lock = (struct ldlm_lock *)(unsigned long)(handle->addr);
300 if (!kmem_cache_validate(ldlm_lock_slab, (void *)lock))
303 l_lock(&lock->l_resource->lr_namespace->ns_lock);
304 if (lock->l_random != handle->cookie)
307 if (lock->l_flags & LDLM_FL_DESTROYED)
310 retval = LDLM_LOCK_GET(lock);
313 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
320 static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b)
322 return lockmode_compat(a->l_req_mode, b->l_req_mode);
325 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
327 ldlm_res2desc(lock->l_resource, &desc->l_resource);
328 desc->l_req_mode = lock->l_req_mode;
329 desc->l_granted_mode = lock->l_granted_mode;
330 memcpy(&desc->l_extent, &lock->l_extent, sizeof(desc->l_extent));
331 memcpy(desc->l_version, lock->l_version, sizeof(desc->l_version));
334 static void ldlm_add_ast_work_item(struct ldlm_lock *lock,
335 struct ldlm_lock *new)
337 struct ldlm_ast_work *w;
340 l_lock(&lock->l_resource->lr_namespace->ns_lock);
341 if (new && (lock->l_flags & LDLM_FL_AST_SENT))
344 OBD_ALLOC(w, sizeof(*w));
351 lock->l_flags |= LDLM_FL_AST_SENT;
353 ldlm_lock2desc(new, &w->w_desc);
356 w->w_lock = LDLM_LOCK_GET(lock);
357 list_add(&w->w_list, lock->l_resource->lr_tmp);
359 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
363 void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
365 struct ldlm_lock *lock;
367 lock = ldlm_handle2lock(lockh);
368 ldlm_lock_addref_internal(lock, mode);
372 /* only called for local locks */
373 void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
375 l_lock(&lock->l_resource->lr_namespace->ns_lock);
376 if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR)
380 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
382 LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
385 /* Args: unlocked lock */
386 void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
388 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
394 LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
395 l_lock(&lock->l_resource->lr_namespace->ns_lock);
396 if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR)
401 /* If we received a blocked AST and this was the last reference,
402 * run the callback. */
403 if (!lock->l_readers && !lock->l_writers &&
404 (lock->l_flags & LDLM_FL_CBPENDING)) {
405 if (!lock->l_resource->lr_namespace->ns_client) {
406 CERROR("LDLM_FL_CBPENDING set on non-local lock!\n");
410 LDLM_DEBUG(lock, "final decref done on cbpending lock");
411 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
413 /* FIXME: need a real 'desc' here */
414 lock->l_blocking_ast(lock, NULL, lock->l_data,
417 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
419 LDLM_LOCK_PUT(lock); /* matches the ldlm_lock_get in addref */
420 LDLM_LOCK_PUT(lock); /* matches the handle2lock above */
425 static int ldlm_lock_compat_list(struct ldlm_lock *lock, int send_cbs,
426 struct list_head *queue)
428 struct list_head *tmp, *pos;
431 list_for_each_safe(tmp, pos, queue) {
432 struct ldlm_lock *child;
433 ldlm_res_compat compat;
435 child = list_entry(tmp, struct ldlm_lock, l_res_link);
439 compat = ldlm_res_compat_table[child->l_resource->lr_type];
440 if (compat && compat(child, lock)) {
441 CDEBUG(D_OTHER, "compat function succeded, next.\n");
444 if (lockmode_compat(child->l_granted_mode, lock->l_req_mode)) {
445 CDEBUG(D_OTHER, "lock modes are compatible, next.\n");
451 if (send_cbs && child->l_blocking_ast != NULL) {
452 CDEBUG(D_OTHER, "incompatible; sending blocking "
454 ldlm_add_ast_work_item(child, lock);
461 static int ldlm_lock_compat(struct ldlm_lock *lock, int send_cbs)
466 l_lock(&lock->l_resource->lr_namespace->ns_lock);
467 rc = ldlm_lock_compat_list(lock, send_cbs,
468 &lock->l_resource->lr_granted);
469 /* FIXME: should we be sending ASTs to converting? */
471 rc = ldlm_lock_compat_list
472 (lock, send_cbs, &lock->l_resource->lr_converting);
474 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
479 - ldlm_handle_enqueuque - resource
481 void ldlm_grant_lock(struct ldlm_lock *lock)
483 struct ldlm_resource *res = lock->l_resource;
486 l_lock(&lock->l_resource->lr_namespace->ns_lock);
487 ldlm_resource_add_lock(res, &res->lr_granted, lock);
488 lock->l_granted_mode = lock->l_req_mode;
490 if (lock->l_granted_mode < res->lr_most_restr)
491 res->lr_most_restr = lock->l_granted_mode;
493 if (lock->l_completion_ast) {
494 ldlm_add_ast_work_item(lock, NULL);
496 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
500 /* returns a referenced lock or NULL */
501 static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
502 struct ldlm_extent *extent)
504 struct ldlm_lock *lock;
505 struct list_head *tmp;
507 list_for_each(tmp, queue) {
508 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
510 if (lock->l_flags & LDLM_FL_CBPENDING)
513 /* lock_convert() takes the resource lock, so we're sure that
514 * req_mode, lr_type, and l_cookie won't change beneath us */
515 if (lock->l_req_mode != mode)
518 if (lock->l_resource->lr_type == LDLM_EXTENT &&
519 (lock->l_extent.start > extent->start ||
520 lock->l_extent.end < extent->end))
523 ldlm_lock_addref_internal(lock, mode);
530 /* Must be called with no resource or lock locks held.
532 * Returns 1 if it finds an already-existing lock that is compatible; in this
533 * case, lockh is filled in with a addref()ed lock
535 int ldlm_lock_match(struct ldlm_namespace *ns, __u64 * res_id, __u32 type,
536 void *cookie, int cookielen, ldlm_mode_t mode,
537 struct lustre_handle *lockh)
539 struct ldlm_resource *res;
540 struct ldlm_lock *lock;
544 res = ldlm_resource_get(ns, NULL, res_id, type, 0);
548 ns = res->lr_namespace;
549 l_lock(&ns->ns_lock);
551 if ((lock = search_queue(&res->lr_granted, mode, cookie)))
553 if ((lock = search_queue(&res->lr_converting, mode, cookie)))
555 if ((lock = search_queue(&res->lr_waiting, mode, cookie)))
560 ldlm_resource_put(res);
561 l_unlock(&ns->ns_lock);
564 ldlm_lock2handle(lock, lockh);
565 if (lock->l_completion_ast)
566 lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC);
569 LDLM_DEBUG(lock, "matched");
571 LDLM_DEBUG_NOLOCK("not matched");
575 /* Returns a referenced lock */
576 struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
577 struct lustre_handle *parent_lock_handle,
578 __u64 * res_id, __u32 type,
579 ldlm_mode_t mode, void *data, __u32 data_len)
581 struct ldlm_resource *res, *parent_res = NULL;
582 struct ldlm_lock *lock, *parent_lock;
584 parent_lock = ldlm_handle2lock(parent_lock_handle);
586 parent_res = parent_lock->l_resource;
588 res = ldlm_resource_get(ns, parent_res, res_id, type, 1);
592 lock = ldlm_lock_new(parent_lock, res);
594 ldlm_resource_put(res);
598 lock->l_req_mode = mode;
600 lock->l_data_len = data_len;
605 /* Must be called with lock->l_lock and lock->l_resource->lr_lock not held */
606 ldlm_error_t ldlm_lock_enqueue(struct ldlm_lock * lock,
607 void *cookie, int cookie_len,
609 ldlm_completion_callback completion,
610 ldlm_blocking_callback blocking)
612 struct ldlm_resource *res;
614 ldlm_res_policy policy;
617 res = lock->l_resource;
618 lock->l_blocking_ast = blocking;
620 if (res->lr_type == LDLM_EXTENT)
621 memcpy(&lock->l_extent, cookie, sizeof(lock->l_extent));
623 /* policies are not executed on the client */
624 local = res->lr_namespace->ns_client;
625 if (!local && (policy = ldlm_res_policy_table[res->lr_type])) {
627 rc = policy(lock, cookie, lock->l_req_mode, NULL);
629 if (rc == ELDLM_LOCK_CHANGED) {
630 res = lock->l_resource;
631 *flags |= LDLM_FL_LOCK_CHANGED;
632 } else if (rc == ELDLM_LOCK_ABORTED) {
633 ldlm_lock_destroy(lock);
638 lock->l_cookie = cookie;
639 lock->l_cookie_len = cookie_len;
641 if (local && lock->l_req_mode == lock->l_granted_mode) {
642 /* The server returned a blocked lock, but it was granted before
643 * we got a chance to actually enqueue it. We don't need to do
648 /* This distinction between local lock trees is very important; a client
649 * namespace only has information about locks taken by that client, and
650 * thus doesn't have enough information to decide for itself if it can
651 * be granted (below). In this case, we do exactly what the server
652 * tells us to do, as dictated by the 'flags' */
653 ldlm_resource_unlink_lock(lock);
655 if (*flags & LDLM_FL_BLOCK_CONV)
656 ldlm_resource_add_lock(res, res->lr_converting.prev,
658 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
659 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
661 ldlm_grant_lock(lock);
665 /* FIXME: We may want to optimize by checking lr_most_restr */
666 if (!list_empty(&res->lr_converting)) {
667 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
668 *flags |= LDLM_FL_BLOCK_CONV;
671 if (!list_empty(&res->lr_waiting)) {
672 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
673 *flags |= LDLM_FL_BLOCK_WAIT;
676 if (!ldlm_lock_compat(lock, 0)) {
677 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
678 *flags |= LDLM_FL_BLOCK_GRANTED;
682 ldlm_grant_lock(lock);
685 /* Don't set 'completion_ast' until here so that if the lock is granted
686 * immediately we don't do an unnecessary completion call. */
687 lock->l_completion_ast = completion;
691 /* Must be called with namespace taken: queue is waiting or converting. */
692 static int ldlm_reprocess_queue(struct ldlm_resource *res,
693 struct list_head *queue)
695 struct list_head *tmp, *pos;
698 list_for_each_safe(tmp, pos, queue) {
699 struct ldlm_lock *pending;
700 pending = list_entry(tmp, struct ldlm_lock, l_res_link);
702 CDEBUG(D_INFO, "Reprocessing lock %p\n", pending);
704 if (!ldlm_lock_compat(pending, 1))
707 list_del_init(&pending->l_res_link);
708 ldlm_grant_lock(pending);
714 void ldlm_run_ast_work(struct list_head *rpc_list)
716 struct list_head *tmp, *pos;
720 list_for_each_safe(tmp, pos, rpc_list) {
721 struct ldlm_ast_work *w =
722 list_entry(tmp, struct ldlm_ast_work, w_list);
725 rc = w->w_lock->l_blocking_ast
726 (w->w_lock, &w->w_desc, w->w_data,
729 rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags);
731 CERROR("Failed AST - should clean & disconnect "
733 LDLM_LOCK_PUT(w->w_lock);
734 list_del(&w->w_list);
735 OBD_FREE(w, sizeof(*w));
740 /* Must be called with resource->lr_lock not taken. */
741 void ldlm_reprocess_all(struct ldlm_resource *res)
743 struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
746 /* Local lock trees don't get reprocessed. */
747 if (res->lr_namespace->ns_client) {
752 l_lock(&res->lr_namespace->ns_lock);
753 res->lr_tmp = &rpc_list;
755 ldlm_reprocess_queue(res, &res->lr_converting);
756 if (list_empty(&res->lr_converting))
757 ldlm_reprocess_queue(res, &res->lr_waiting);
760 l_unlock(&res->lr_namespace->ns_lock);
762 ldlm_run_ast_work(&rpc_list);
766 void ldlm_lock_cancel(struct ldlm_lock *lock)
768 struct ldlm_resource *res;
769 struct ldlm_namespace *ns;
772 res = lock->l_resource;
773 ns = res->lr_namespace;
775 l_lock(&ns->ns_lock);
776 if (lock->l_readers || lock->l_writers)
777 CDEBUG(D_INFO, "lock still has references (%d readers, %d "
778 "writers)\n", lock->l_readers, lock->l_writers);
780 ldlm_resource_unlink_lock(lock);
781 ldlm_lock_destroy(lock);
782 l_unlock(&ns->ns_lock);
786 struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
789 struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
790 struct ldlm_resource *res;
791 struct ldlm_namespace *ns;
795 res = lock->l_resource;
796 ns = res->lr_namespace;
798 l_lock(&ns->ns_lock);
800 lock->l_req_mode = new_mode;
801 ldlm_resource_unlink_lock(lock);
803 /* If this is a local resource, put it on the appropriate list. */
804 if (res->lr_namespace->ns_client) {
805 if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED))
806 ldlm_resource_add_lock(res, res->lr_converting.prev,
809 res->lr_tmp = &rpc_list;
810 ldlm_grant_lock(lock);
813 /* FIXME: completion handling not with ns_lock held ! */
814 if (lock->l_completion_ast)
815 lock->l_completion_ast(lock, 0);
818 list_add_tail(&lock->l_res_link, &res->lr_converting);
820 l_unlock(&ns->ns_lock);
823 ldlm_run_ast_work(&rpc_list);
827 void ldlm_lock_dump(struct ldlm_lock *lock)
831 if (!(portal_debug & D_OTHER))
834 if (RES_VERSION_SIZE != 4)
838 CDEBUG(D_OTHER, " NULL LDLM lock\n");
842 snprintf(ver, sizeof(ver), "%x %x %x %x",
843 lock->l_version[0], lock->l_version[1],
844 lock->l_version[2], lock->l_version[3]);
846 CDEBUG(D_OTHER, " -- Lock dump: %p (%s)\n", lock, ver);
847 if (lock->l_connection)
848 CDEBUG(D_OTHER, " Node: NID %x (rhandle: %Lx)\n",
849 lock->l_connection->c_peer.peer_nid,
850 lock->l_remote_handle.addr);
852 CDEBUG(D_OTHER, " Node: local\n");
853 CDEBUG(D_OTHER, " Parent: %p\n", lock->l_parent);
854 CDEBUG(D_OTHER, " Resource: %p (%Ld)\n", lock->l_resource,
855 lock->l_resource->lr_name[0]);
856 CDEBUG(D_OTHER, " Requested mode: %d, granted mode: %d\n",
857 (int)lock->l_req_mode, (int)lock->l_granted_mode);
858 CDEBUG(D_OTHER, " Readers: %u ; Writers; %u\n",
859 lock->l_readers, lock->l_writers);
860 if (lock->l_resource->lr_type == LDLM_EXTENT)
861 CDEBUG(D_OTHER, " Extent: %Lu -> %Lu\n",
862 (unsigned long long)lock->l_extent.start,
863 (unsigned long long)lock->l_extent.end);