1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2002 Cluster File Systems, Inc.
5 * Author: Peter Braam <braam@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_LDLM
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <linux/random.h>
29 #include <linux/lustre_dlm.h>
30 #include <linux/lustre_mds.h>
33 char *ldlm_lockname[] = {
42 char *ldlm_typename[] = {
45 [LDLM_MDSINTENT] "INT"
48 char *ldlm_it2str(int it)
55 case (IT_OPEN | IT_CREAT):
84 CERROR("Unknown intent %d\n", it);
89 extern kmem_cache_t *ldlm_lock_slab;
91 static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b);
93 ldlm_res_compat ldlm_res_compat_table[] = {
94 [LDLM_PLAIN] ldlm_plain_compat,
95 [LDLM_EXTENT] ldlm_extent_compat,
96 [LDLM_MDSINTENT] ldlm_plain_compat
99 ldlm_res_policy ldlm_res_policy_table[] = {
101 [LDLM_EXTENT] ldlm_extent_policy,
102 [LDLM_MDSINTENT] NULL
105 void ldlm_register_intent(int (*arg) (struct ldlm_lock * lock, void *req_cookie,
106 ldlm_mode_t mode, void *data))
108 ldlm_res_policy_table[LDLM_MDSINTENT] = arg;
111 void ldlm_unregister_intent(void)
113 ldlm_res_policy_table[LDLM_MDSINTENT] = NULL;
117 * REFCOUNTED LOCK OBJECTS
122 * Lock refcounts, during creation:
123 * - one special one for allocation, dec'd only once in destroy
124 * - one for being a lock that's in-use
125 * - one for the addref associated with a new lock
127 struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock)
129 l_lock(&lock->l_resource->lr_namespace->ns_lock);
131 ldlm_resource_getref(lock->l_resource);
132 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
136 void ldlm_lock_put(struct ldlm_lock *lock)
138 struct lustre_lock *nslock = &lock->l_resource->lr_namespace->ns_lock;
143 //LDLM_DEBUG(lock, "after refc--");
144 if (lock->l_refc < 0)
147 ldlm_resource_put(lock->l_resource);
149 LDLM_LOCK_PUT(lock->l_parent);
151 if (lock->l_refc == 0 && (lock->l_flags & LDLM_FL_DESTROYED)) {
152 lock->l_resource = NULL;
153 LDLM_DEBUG(lock, "final lock_put on destroyed lock, freeing");
154 if (lock->l_export && lock->l_export->exp_connection)
155 ptlrpc_put_connection(lock->l_export->exp_connection);
156 CDEBUG(D_MALLOC, "kfreed 'lock': %d at %p (tot 1).\n",
157 sizeof(*lock), lock);
158 kmem_cache_free(ldlm_lock_slab, lock);
164 void ldlm_lock_destroy(struct ldlm_lock *lock)
167 l_lock(&lock->l_resource->lr_namespace->ns_lock);
169 if (!list_empty(&lock->l_children)) {
170 LDLM_DEBUG(lock, "still has children (%p)!",
171 lock->l_children.next);
172 ldlm_lock_dump(lock);
175 if (lock->l_readers || lock->l_writers) {
176 LDLM_DEBUG(lock, "lock still has references");
177 ldlm_lock_dump(lock);
181 if (!list_empty(&lock->l_res_link)) {
182 ldlm_lock_dump(lock);
186 if (lock->l_flags & LDLM_FL_DESTROYED) {
187 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
192 lock->l_flags = LDLM_FL_DESTROYED;
193 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
199 usage: pass in a resource on which you have done get
200 pass in a parent lock on which you have done a get
201 do not put the resource or the parent
202 returns: lock with refcount 1
204 static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
205 struct ldlm_resource *resource)
207 struct ldlm_lock *lock;
210 if (resource == NULL)
213 lock = kmem_cache_alloc(ldlm_lock_slab, SLAB_KERNEL);
216 CDEBUG(D_MALLOC, "kmalloced 'lock': %d at "
217 "%p (tot %d).\n", sizeof(*lock), lock, 1);
219 memset(lock, 0, sizeof(*lock));
220 get_random_bytes(&lock->l_random, sizeof(__u64));
222 lock->l_resource = resource;
223 /* this refcount matches the one of the resource passed
224 in which is not being put away */
226 INIT_LIST_HEAD(&lock->l_children);
227 INIT_LIST_HEAD(&lock->l_res_link);
228 INIT_LIST_HEAD(&lock->l_inode_link);
229 init_waitqueue_head(&lock->l_waitq);
231 if (parent != NULL) {
232 l_lock(&parent->l_resource->lr_namespace->ns_lock);
233 lock->l_parent = parent;
234 list_add(&lock->l_childof, &parent->l_children);
235 l_unlock(&parent->l_resource->lr_namespace->ns_lock);
237 /* this is the extra refcount, to prevent the lock
243 int ldlm_lock_change_resource(struct ldlm_lock *lock, __u64 new_resid[3])
245 struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
246 struct ldlm_resource *oldres = lock->l_resource;
250 l_lock(&ns->ns_lock);
251 if (memcmp(new_resid, lock->l_resource->lr_name,
252 sizeof(lock->l_resource->lr_name)) == 0) {
254 l_unlock(&ns->ns_lock);
258 type = lock->l_resource->lr_type;
259 if (new_resid[0] == 0)
261 lock->l_resource = ldlm_resource_get(ns, NULL, new_resid, type, 1);
262 if (lock->l_resource == NULL) {
267 /* move references over */
268 for (i = 0; i < lock->l_refc; i++) {
270 ldlm_resource_getref(lock->l_resource);
271 rc = ldlm_resource_put(oldres);
272 if (rc == 1 && i != lock->l_refc - 1)
275 /* compensate for the initial get above.. */
276 ldlm_resource_put(lock->l_resource);
278 l_unlock(&ns->ns_lock);
286 void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh)
288 lockh->addr = (__u64) (unsigned long)lock;
289 lockh->cookie = lock->l_random;
292 struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *handle)
294 struct ldlm_lock *lock = NULL, *retval = NULL;
297 if (!handle || !handle->addr)
300 lock = (struct ldlm_lock *)(unsigned long)(handle->addr);
301 if (!kmem_cache_validate(ldlm_lock_slab, (void *)lock))
304 l_lock(&lock->l_resource->lr_namespace->ns_lock);
305 if (lock->l_random != handle->cookie)
308 if (lock->l_flags & LDLM_FL_DESTROYED)
311 retval = LDLM_LOCK_GET(lock);
314 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
321 static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b)
323 return lockmode_compat(a->l_req_mode, b->l_req_mode);
326 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
328 ldlm_res2desc(lock->l_resource, &desc->l_resource);
329 desc->l_req_mode = lock->l_req_mode;
330 desc->l_granted_mode = lock->l_granted_mode;
331 memcpy(&desc->l_extent, &lock->l_extent, sizeof(desc->l_extent));
332 memcpy(desc->l_version, lock->l_version, sizeof(desc->l_version));
335 static void ldlm_add_ast_work_item(struct ldlm_lock *lock,
336 struct ldlm_lock *new)
338 struct ldlm_ast_work *w;
341 l_lock(&lock->l_resource->lr_namespace->ns_lock);
342 if (new && (lock->l_flags & LDLM_FL_AST_SENT))
345 OBD_ALLOC(w, sizeof(*w));
352 lock->l_flags |= LDLM_FL_AST_SENT;
354 ldlm_lock2desc(new, &w->w_desc);
357 w->w_lock = LDLM_LOCK_GET(lock);
358 list_add(&w->w_list, lock->l_resource->lr_tmp);
360 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
364 void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
366 struct ldlm_lock *lock;
368 lock = ldlm_handle2lock(lockh);
369 ldlm_lock_addref_internal(lock, mode);
373 /* only called for local locks */
374 void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
376 l_lock(&lock->l_resource->lr_namespace->ns_lock);
377 if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR)
381 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
383 LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
386 /* Args: unlocked lock */
387 void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
389 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
395 LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
396 l_lock(&lock->l_resource->lr_namespace->ns_lock);
397 if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR)
402 /* If we received a blocked AST and this was the last reference,
403 * run the callback. */
404 if (!lock->l_readers && !lock->l_writers &&
405 (lock->l_flags & LDLM_FL_CBPENDING)) {
406 if (!lock->l_resource->lr_namespace->ns_client) {
407 CERROR("LDLM_FL_CBPENDING set on non-local lock!\n");
411 LDLM_DEBUG(lock, "final decref done on cbpending lock");
412 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
414 /* FIXME: need a real 'desc' here */
415 lock->l_blocking_ast(lock, NULL, lock->l_data,
418 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
420 LDLM_LOCK_PUT(lock); /* matches the ldlm_lock_get in addref */
421 LDLM_LOCK_PUT(lock); /* matches the handle2lock above */
426 static int ldlm_lock_compat_list(struct ldlm_lock *lock, int send_cbs,
427 struct list_head *queue)
429 struct list_head *tmp, *pos;
432 list_for_each_safe(tmp, pos, queue) {
433 struct ldlm_lock *child;
434 ldlm_res_compat compat;
436 child = list_entry(tmp, struct ldlm_lock, l_res_link);
440 compat = ldlm_res_compat_table[child->l_resource->lr_type];
441 if (compat && compat(child, lock)) {
442 CDEBUG(D_OTHER, "compat function succeded, next.\n");
445 if (lockmode_compat(child->l_granted_mode, lock->l_req_mode)) {
446 CDEBUG(D_OTHER, "lock modes are compatible, next.\n");
452 if (send_cbs && child->l_blocking_ast != NULL) {
453 CDEBUG(D_OTHER, "incompatible; sending blocking "
455 ldlm_add_ast_work_item(child, lock);
462 static int ldlm_lock_compat(struct ldlm_lock *lock, int send_cbs)
467 l_lock(&lock->l_resource->lr_namespace->ns_lock);
468 rc = ldlm_lock_compat_list(lock, send_cbs,
469 &lock->l_resource->lr_granted);
470 /* FIXME: should we be sending ASTs to converting? */
472 rc = ldlm_lock_compat_list
473 (lock, send_cbs, &lock->l_resource->lr_converting);
475 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
480 - ldlm_handle_enqueuque - resource
482 void ldlm_grant_lock(struct ldlm_lock *lock)
484 struct ldlm_resource *res = lock->l_resource;
487 l_lock(&lock->l_resource->lr_namespace->ns_lock);
488 ldlm_resource_add_lock(res, &res->lr_granted, lock);
489 lock->l_granted_mode = lock->l_req_mode;
491 if (lock->l_granted_mode < res->lr_most_restr)
492 res->lr_most_restr = lock->l_granted_mode;
494 if (lock->l_completion_ast) {
495 ldlm_add_ast_work_item(lock, NULL);
497 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
501 /* returns a referenced lock or NULL */
502 static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
503 struct ldlm_extent *extent)
505 struct ldlm_lock *lock;
506 struct list_head *tmp;
508 list_for_each(tmp, queue) {
509 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
511 if (lock->l_flags & LDLM_FL_CBPENDING)
514 /* lock_convert() takes the resource lock, so we're sure that
515 * req_mode, lr_type, and l_cookie won't change beneath us */
516 if (lock->l_req_mode != mode)
519 if (lock->l_resource->lr_type == LDLM_EXTENT &&
520 (lock->l_extent.start > extent->start ||
521 lock->l_extent.end < extent->end))
524 ldlm_lock_addref_internal(lock, mode);
531 /* Must be called with no resource or lock locks held.
533 * Returns 1 if it finds an already-existing lock that is compatible; in this
534 * case, lockh is filled in with a addref()ed lock
536 int ldlm_lock_match(struct ldlm_namespace *ns, __u64 * res_id, __u32 type,
537 void *cookie, int cookielen, ldlm_mode_t mode,
538 struct lustre_handle *lockh)
540 struct ldlm_resource *res;
541 struct ldlm_lock *lock;
545 res = ldlm_resource_get(ns, NULL, res_id, type, 0);
549 ns = res->lr_namespace;
550 l_lock(&ns->ns_lock);
552 if ((lock = search_queue(&res->lr_granted, mode, cookie)))
554 if ((lock = search_queue(&res->lr_converting, mode, cookie)))
556 if ((lock = search_queue(&res->lr_waiting, mode, cookie)))
561 ldlm_resource_put(res);
562 l_unlock(&ns->ns_lock);
565 ldlm_lock2handle(lock, lockh);
566 if (lock->l_completion_ast)
567 lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC);
570 LDLM_DEBUG(lock, "matched");
572 LDLM_DEBUG_NOLOCK("not matched");
576 /* Returns a referenced lock */
577 struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
578 struct lustre_handle *parent_lock_handle,
579 __u64 * res_id, __u32 type,
580 ldlm_mode_t mode, void *data, __u32 data_len)
582 struct ldlm_resource *res, *parent_res = NULL;
583 struct ldlm_lock *lock, *parent_lock;
585 parent_lock = ldlm_handle2lock(parent_lock_handle);
587 parent_res = parent_lock->l_resource;
589 res = ldlm_resource_get(ns, parent_res, res_id, type, 1);
593 lock = ldlm_lock_new(parent_lock, res);
595 ldlm_resource_put(res);
599 lock->l_req_mode = mode;
601 lock->l_data_len = data_len;
606 /* Must be called with lock->l_lock and lock->l_resource->lr_lock not held */
607 ldlm_error_t ldlm_lock_enqueue(struct ldlm_lock * lock,
608 void *cookie, int cookie_len,
610 ldlm_completion_callback completion,
611 ldlm_blocking_callback blocking)
613 struct ldlm_resource *res;
615 ldlm_res_policy policy;
618 res = lock->l_resource;
619 lock->l_blocking_ast = blocking;
621 if (res->lr_type == LDLM_EXTENT)
622 memcpy(&lock->l_extent, cookie, sizeof(lock->l_extent));
624 /* policies are not executed on the client */
625 local = res->lr_namespace->ns_client;
626 if (!local && (policy = ldlm_res_policy_table[res->lr_type])) {
628 rc = policy(lock, cookie, lock->l_req_mode, NULL);
630 if (rc == ELDLM_LOCK_CHANGED) {
631 res = lock->l_resource;
632 *flags |= LDLM_FL_LOCK_CHANGED;
633 } else if (rc == ELDLM_LOCK_ABORTED) {
634 ldlm_lock_destroy(lock);
639 lock->l_cookie = cookie;
640 lock->l_cookie_len = cookie_len;
642 if (local && lock->l_req_mode == lock->l_granted_mode) {
643 /* The server returned a blocked lock, but it was granted before
644 * we got a chance to actually enqueue it. We don't need to do
649 /* This distinction between local lock trees is very important; a client
650 * namespace only has information about locks taken by that client, and
651 * thus doesn't have enough information to decide for itself if it can
652 * be granted (below). In this case, we do exactly what the server
653 * tells us to do, as dictated by the 'flags' */
654 ldlm_resource_unlink_lock(lock);
656 if (*flags & LDLM_FL_BLOCK_CONV)
657 ldlm_resource_add_lock(res, res->lr_converting.prev,
659 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
660 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
662 ldlm_grant_lock(lock);
666 /* FIXME: We may want to optimize by checking lr_most_restr */
667 if (!list_empty(&res->lr_converting)) {
668 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
669 *flags |= LDLM_FL_BLOCK_CONV;
672 if (!list_empty(&res->lr_waiting)) {
673 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
674 *flags |= LDLM_FL_BLOCK_WAIT;
677 if (!ldlm_lock_compat(lock, 0)) {
678 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
679 *flags |= LDLM_FL_BLOCK_GRANTED;
683 ldlm_grant_lock(lock);
686 /* Don't set 'completion_ast' until here so that if the lock is granted
687 * immediately we don't do an unnecessary completion call. */
688 lock->l_completion_ast = completion;
692 /* Must be called with namespace taken: queue is waiting or converting. */
693 static int ldlm_reprocess_queue(struct ldlm_resource *res,
694 struct list_head *queue)
696 struct list_head *tmp, *pos;
699 list_for_each_safe(tmp, pos, queue) {
700 struct ldlm_lock *pending;
701 pending = list_entry(tmp, struct ldlm_lock, l_res_link);
703 CDEBUG(D_INFO, "Reprocessing lock %p\n", pending);
705 if (!ldlm_lock_compat(pending, 1))
708 list_del_init(&pending->l_res_link);
709 ldlm_grant_lock(pending);
715 void ldlm_run_ast_work(struct list_head *rpc_list)
717 struct list_head *tmp, *pos;
721 list_for_each_safe(tmp, pos, rpc_list) {
722 struct ldlm_ast_work *w =
723 list_entry(tmp, struct ldlm_ast_work, w_list);
726 rc = w->w_lock->l_blocking_ast
727 (w->w_lock, &w->w_desc, w->w_data,
730 rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags);
732 CERROR("Failed AST - should clean & disconnect "
734 LDLM_LOCK_PUT(w->w_lock);
735 list_del(&w->w_list);
736 OBD_FREE(w, sizeof(*w));
741 /* Must be called with resource->lr_lock not taken. */
742 void ldlm_reprocess_all(struct ldlm_resource *res)
744 struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
747 /* Local lock trees don't get reprocessed. */
748 if (res->lr_namespace->ns_client) {
753 l_lock(&res->lr_namespace->ns_lock);
754 res->lr_tmp = &rpc_list;
756 ldlm_reprocess_queue(res, &res->lr_converting);
757 if (list_empty(&res->lr_converting))
758 ldlm_reprocess_queue(res, &res->lr_waiting);
761 l_unlock(&res->lr_namespace->ns_lock);
763 ldlm_run_ast_work(&rpc_list);
767 void ldlm_lock_cancel(struct ldlm_lock *lock)
769 struct ldlm_resource *res;
770 struct ldlm_namespace *ns;
773 res = lock->l_resource;
774 ns = res->lr_namespace;
776 l_lock(&ns->ns_lock);
777 if (lock->l_readers || lock->l_writers)
778 CDEBUG(D_INFO, "lock still has references (%d readers, %d "
779 "writers)\n", lock->l_readers, lock->l_writers);
781 ldlm_resource_unlink_lock(lock);
782 ldlm_lock_destroy(lock);
783 l_unlock(&ns->ns_lock);
787 struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
790 struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
791 struct ldlm_resource *res;
792 struct ldlm_namespace *ns;
796 res = lock->l_resource;
797 ns = res->lr_namespace;
799 l_lock(&ns->ns_lock);
801 lock->l_req_mode = new_mode;
802 ldlm_resource_unlink_lock(lock);
804 /* If this is a local resource, put it on the appropriate list. */
805 if (res->lr_namespace->ns_client) {
806 if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED))
807 ldlm_resource_add_lock(res, res->lr_converting.prev,
810 res->lr_tmp = &rpc_list;
811 ldlm_grant_lock(lock);
814 /* FIXME: completion handling not with ns_lock held ! */
815 if (lock->l_completion_ast)
816 lock->l_completion_ast(lock, 0);
819 list_add_tail(&lock->l_res_link, &res->lr_converting);
821 l_unlock(&ns->ns_lock);
824 ldlm_run_ast_work(&rpc_list);
828 void ldlm_lock_dump(struct ldlm_lock *lock)
832 if (!(portal_debug & D_OTHER))
835 if (RES_VERSION_SIZE != 4)
839 CDEBUG(D_OTHER, " NULL LDLM lock\n");
843 snprintf(ver, sizeof(ver), "%x %x %x %x",
844 lock->l_version[0], lock->l_version[1],
845 lock->l_version[2], lock->l_version[3]);
847 CDEBUG(D_OTHER, " -- Lock dump: %p (%s)\n", lock, ver);
848 if (lock->l_export && lock->l_export->exp_connection)
849 CDEBUG(D_OTHER, " Node: NID %x (rhandle: %Lx)\n",
850 lock->l_export->exp_connection->c_peer.peer_nid,
851 lock->l_remote_handle.addr);
853 CDEBUG(D_OTHER, " Node: local\n");
854 CDEBUG(D_OTHER, " Parent: %p\n", lock->l_parent);
855 CDEBUG(D_OTHER, " Resource: %p (%Ld)\n", lock->l_resource,
856 lock->l_resource->lr_name[0]);
857 CDEBUG(D_OTHER, " Requested mode: %d, granted mode: %d\n",
858 (int)lock->l_req_mode, (int)lock->l_granted_mode);
859 CDEBUG(D_OTHER, " Readers: %u ; Writers; %u\n",
860 lock->l_readers, lock->l_writers);
861 if (lock->l_resource->lr_type == LDLM_EXTENT)
862 CDEBUG(D_OTHER, " Extent: %Lu -> %Lu\n",
863 (unsigned long long)lock->l_extent.start,
864 (unsigned long long)lock->l_extent.end);