1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002 Cluster File Systems, Inc.
6 * This code is issued under the GNU General Public License.
7 * See the file COPYING in this distribution
9 * by Cluster File Systems, Inc.
10 * authors, Peter Braam <braam@clusterfs.com> &
11 * Phil Schwan <phil@clusterfs.com>
14 #define DEBUG_SUBSYSTEM S_LDLM
16 #include <linux/slab.h>
17 #include <linux/module.h>
18 #include <linux/random.h>
19 #include <linux/lustre_dlm.h>
20 #include <linux/lustre_mds.h>
23 char *ldlm_lockname[] = {
32 char *ldlm_typename[] = {
35 [LDLM_MDSINTENT] "INT"
38 char *ldlm_it2str(int it)
45 case (IT_OPEN | IT_CREAT):
74 CERROR("Unknown intent %d\n", it);
79 extern kmem_cache_t *ldlm_lock_slab;
81 static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b);
83 ldlm_res_compat ldlm_res_compat_table[] = {
84 [LDLM_PLAIN] ldlm_plain_compat,
85 [LDLM_EXTENT] ldlm_extent_compat,
86 [LDLM_MDSINTENT] ldlm_plain_compat
89 ldlm_res_policy ldlm_res_policy_table[] = {
91 [LDLM_EXTENT] ldlm_extent_policy,
95 void ldlm_register_intent(int (*arg) (struct ldlm_lock * lock, void *req_cookie,
96 ldlm_mode_t mode, void *data))
98 ldlm_res_policy_table[LDLM_MDSINTENT] = arg;
101 void ldlm_unregister_intent()
103 ldlm_res_policy_table[LDLM_MDSINTENT] = NULL;
107 * REFCOUNTED LOCK OBJECTS
112 * Lock refcounts, during creation:
113 * - one special one for allocation, dec'd only once in destroy
114 * - one for being a lock that's in-use
115 * - one for the addref associated with a new lock
117 struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock)
119 l_lock(&lock->l_resource->lr_namespace->ns_lock);
121 ldlm_resource_getref(lock->l_resource);
122 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
126 void ldlm_lock_put(struct ldlm_lock *lock)
128 struct lustre_lock *nslock = &lock->l_resource->lr_namespace->ns_lock;
133 LDLM_DEBUG(lock, "after refc--");
134 if (lock->l_refc < 0)
137 ldlm_resource_put(lock->l_resource);
139 LDLM_LOCK_PUT(lock->l_parent);
141 if (lock->l_refc == 0 && (lock->l_flags & LDLM_FL_DESTROYED)) {
142 lock->l_resource = NULL;
143 LDLM_DEBUG(lock, "final lock_put on destroyed lock, freeing");
144 if (lock->l_connection)
145 ptlrpc_put_connection(lock->l_connection);
146 CDEBUG(D_MALLOC, "kfreed 'lock': %d at %p (tot 1).\n",
147 sizeof(*lock), lock);
148 kmem_cache_free(ldlm_lock_slab, lock);
154 void ldlm_lock_destroy(struct ldlm_lock *lock)
157 l_lock(&lock->l_resource->lr_namespace->ns_lock);
159 if (!list_empty(&lock->l_children)) {
160 LDLM_DEBUG(lock, "still has children (%p)!",
161 lock->l_children.next);
162 ldlm_lock_dump(lock);
165 if (lock->l_readers || lock->l_writers) {
166 LDLM_DEBUG(lock, "lock still has references");
167 ldlm_lock_dump(lock);
171 if (!list_empty(&lock->l_res_link)) {
172 ldlm_lock_dump(lock);
176 if (lock->l_flags & LDLM_FL_DESTROYED) {
177 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
182 lock->l_flags = LDLM_FL_DESTROYED;
183 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
189 usage: pass in a resource on which you have done get
190 pass in a parent lock on which you have done a get
191 do not put the resource or the parent
192 returns: lock with refcount 1
194 static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
195 struct ldlm_resource *resource)
197 struct ldlm_lock *lock;
200 if (resource == NULL)
203 lock = kmem_cache_alloc(ldlm_lock_slab, SLAB_KERNEL);
206 CDEBUG(D_MALLOC, "kmalloced 'lock': %d at "
207 "%p (tot %d).\n", sizeof(*lock), lock, 1);
209 memset(lock, 0, sizeof(*lock));
210 get_random_bytes(&lock->l_random, sizeof(__u64));
212 lock->l_resource = resource;
213 /* this refcount matches the one of the resource passed
214 in which is not being put away */
216 INIT_LIST_HEAD(&lock->l_children);
217 INIT_LIST_HEAD(&lock->l_res_link);
218 init_waitqueue_head(&lock->l_waitq);
220 if (parent != NULL) {
221 l_lock(&parent->l_resource->lr_namespace->ns_lock);
222 lock->l_parent = parent;
223 list_add(&lock->l_childof, &parent->l_children);
224 l_unlock(&parent->l_resource->lr_namespace->ns_lock);
226 /* this is the extra refcount, to prevent the lock
232 int ldlm_lock_change_resource(struct ldlm_lock *lock, __u64 new_resid[3])
234 struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
235 struct ldlm_resource *oldres = lock->l_resource;
239 l_lock(&ns->ns_lock);
240 if (memcmp(new_resid, lock->l_resource->lr_name,
241 sizeof(lock->l_resource->lr_name)) == 0) {
243 l_unlock(&ns->ns_lock);
247 type = lock->l_resource->lr_type;
248 if (new_resid[0] == 0)
250 lock->l_resource = ldlm_resource_get(ns, NULL, new_resid, type, 1);
251 if (lock->l_resource == NULL) {
256 /* move references over */
257 for (i = 0; i < lock->l_refc; i++) {
259 ldlm_resource_getref(lock->l_resource);
260 rc = ldlm_resource_put(oldres);
261 if (rc == 1 && i != lock->l_refc - 1)
264 /* compensate for the initial get above.. */
265 ldlm_resource_put(lock->l_resource);
267 l_unlock(&ns->ns_lock);
275 void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh)
277 lockh->addr = (__u64) (unsigned long)lock;
278 lockh->cookie = lock->l_random;
281 struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *handle)
283 struct ldlm_lock *lock = NULL, *retval = NULL;
286 if (!handle || !handle->addr)
289 lock = (struct ldlm_lock *)(unsigned long)(handle->addr);
290 if (!kmem_cache_validate(ldlm_lock_slab, (void *)lock))
293 l_lock(&lock->l_resource->lr_namespace->ns_lock);
294 if (lock->l_random != handle->cookie)
297 if (lock->l_flags & LDLM_FL_DESTROYED)
300 retval = LDLM_LOCK_GET(lock);
303 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
310 static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b)
312 return lockmode_compat(a->l_req_mode, b->l_req_mode);
315 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
317 ldlm_res2desc(lock->l_resource, &desc->l_resource);
318 desc->l_req_mode = lock->l_req_mode;
319 desc->l_granted_mode = lock->l_granted_mode;
320 memcpy(&desc->l_extent, &lock->l_extent, sizeof(desc->l_extent));
321 memcpy(desc->l_version, lock->l_version, sizeof(desc->l_version));
324 static void ldlm_add_ast_work_item(struct ldlm_lock *lock,
325 struct ldlm_lock *new)
327 struct ldlm_ast_work *w;
330 l_lock(&lock->l_resource->lr_namespace->ns_lock);
331 if (new && (lock->l_flags & LDLM_FL_AST_SENT))
334 OBD_ALLOC(w, sizeof(*w));
341 lock->l_flags |= LDLM_FL_AST_SENT;
343 ldlm_lock2desc(new, &w->w_desc);
346 w->w_lock = LDLM_LOCK_GET(lock);
347 list_add(&w->w_list, lock->l_resource->lr_tmp);
349 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
353 void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
355 struct ldlm_lock *lock;
357 lock = ldlm_handle2lock(lockh);
358 ldlm_lock_addref_internal(lock, mode);
362 /* only called for local locks */
363 void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
365 l_lock(&lock->l_resource->lr_namespace->ns_lock);
366 if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR)
370 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
372 LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
375 /* Args: unlocked lock */
376 void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
378 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
384 LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
385 l_lock(&lock->l_resource->lr_namespace->ns_lock);
386 if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR)
391 /* If we received a blocked AST and this was the last reference,
392 * run the callback. */
393 if (!lock->l_readers && !lock->l_writers &&
394 (lock->l_flags & LDLM_FL_CBPENDING)) {
395 if (!lock->l_resource->lr_namespace->ns_client) {
396 CERROR("LDLM_FL_CBPENDING set on non-local lock!\n");
400 LDLM_DEBUG(lock, "final decref done on cbpending lock");
401 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
403 /* FIXME: need a real 'desc' here */
404 lock->l_blocking_ast(lock, NULL, lock->l_data,
407 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
409 LDLM_LOCK_PUT(lock); /* matches the ldlm_lock_get in addref */
410 LDLM_LOCK_PUT(lock); /* matches the handle2lock above */
415 static int ldlm_lock_compat_list(struct ldlm_lock *lock, int send_cbs,
416 struct list_head *queue)
418 struct list_head *tmp, *pos;
421 list_for_each_safe(tmp, pos, queue) {
422 struct ldlm_lock *child;
423 ldlm_res_compat compat;
425 child = list_entry(tmp, struct ldlm_lock, l_res_link);
429 compat = ldlm_res_compat_table[child->l_resource->lr_type];
430 if (compat && compat(child, lock)) {
431 CDEBUG(D_OTHER, "compat function succeded, next.\n");
434 if (lockmode_compat(child->l_granted_mode, lock->l_req_mode)) {
435 CDEBUG(D_OTHER, "lock modes are compatible, next.\n");
441 if (send_cbs && child->l_blocking_ast != NULL) {
442 CDEBUG(D_OTHER, "incompatible; sending blocking "
444 ldlm_add_ast_work_item(child, lock);
451 static int ldlm_lock_compat(struct ldlm_lock *lock, int send_cbs)
456 l_lock(&lock->l_resource->lr_namespace->ns_lock);
457 rc = ldlm_lock_compat_list(lock, send_cbs,
458 &lock->l_resource->lr_granted);
459 /* FIXME: should we be sending ASTs to converting? */
461 rc = ldlm_lock_compat_list
462 (lock, send_cbs, &lock->l_resource->lr_converting);
464 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
469 - ldlm_handle_enqueuque - resource
471 void ldlm_grant_lock(struct ldlm_lock *lock)
473 struct ldlm_resource *res = lock->l_resource;
476 l_lock(&lock->l_resource->lr_namespace->ns_lock);
477 ldlm_resource_add_lock(res, &res->lr_granted, lock);
478 lock->l_granted_mode = lock->l_req_mode;
480 if (lock->l_granted_mode < res->lr_most_restr)
481 res->lr_most_restr = lock->l_granted_mode;
483 if (lock->l_completion_ast) {
484 ldlm_add_ast_work_item(lock, NULL);
486 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
490 /* returns a referenced lock or NULL */
491 static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
492 struct ldlm_extent *extent)
494 struct ldlm_lock *lock;
495 struct list_head *tmp;
497 list_for_each(tmp, queue) {
498 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
500 if (lock->l_flags & LDLM_FL_CBPENDING)
503 /* lock_convert() takes the resource lock, so we're sure that
504 * req_mode, lr_type, and l_cookie won't change beneath us */
505 if (lock->l_req_mode != mode)
508 if (lock->l_resource->lr_type == LDLM_EXTENT &&
509 (lock->l_extent.start > extent->start ||
510 lock->l_extent.end < extent->end))
513 ldlm_lock_addref_internal(lock, mode);
520 /* Must be called with no resource or lock locks held.
522 * Returns 1 if it finds an already-existing lock that is compatible; in this
523 * case, lockh is filled in with a addref()ed lock
525 int ldlm_lock_match(struct ldlm_namespace *ns, __u64 * res_id, __u32 type,
526 void *cookie, int cookielen, ldlm_mode_t mode,
527 struct lustre_handle *lockh)
529 struct ldlm_resource *res;
530 struct ldlm_lock *lock;
534 res = ldlm_resource_get(ns, NULL, res_id, type, 0);
538 ns = res->lr_namespace;
539 l_lock(&ns->ns_lock);
541 if ((lock = search_queue(&res->lr_granted, mode, cookie)))
543 if ((lock = search_queue(&res->lr_converting, mode, cookie)))
545 if ((lock = search_queue(&res->lr_waiting, mode, cookie)))
550 ldlm_resource_put(res);
551 l_unlock(&ns->ns_lock);
554 ldlm_lock2handle(lock, lockh);
555 if (lock->l_completion_ast)
556 lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC);
559 LDLM_DEBUG(lock, "matched");
561 LDLM_DEBUG_NOLOCK("not matched");
565 /* Returns a referenced, lock */
566 struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
567 struct lustre_handle *parent_lock_handle,
568 __u64 * res_id, __u32 type,
569 ldlm_mode_t mode, void *data, __u32 data_len)
571 struct ldlm_resource *res, *parent_res = NULL;
572 struct ldlm_lock *lock, *parent_lock;
574 parent_lock = ldlm_handle2lock(parent_lock_handle);
576 parent_res = parent_lock->l_resource;
578 res = ldlm_resource_get(ns, parent_res, res_id, type, 1);
582 lock = ldlm_lock_new(parent_lock, res);
584 ldlm_resource_put(res);
588 lock->l_req_mode = mode;
590 lock->l_data_len = data_len;
595 /* Must be called with lock->l_lock and lock->l_resource->lr_lock not held */
596 ldlm_error_t ldlm_lock_enqueue(struct ldlm_lock * lock,
597 void *cookie, int cookie_len,
599 ldlm_completion_callback completion,
600 ldlm_blocking_callback blocking)
602 struct ldlm_resource *res;
604 ldlm_res_policy policy;
607 res = lock->l_resource;
608 lock->l_blocking_ast = blocking;
610 if (res->lr_type == LDLM_EXTENT)
611 memcpy(&lock->l_extent, cookie, sizeof(lock->l_extent));
613 /* policies are not executed on the client */
614 local = res->lr_namespace->ns_client;
615 if (!local && (policy = ldlm_res_policy_table[res->lr_type])) {
617 rc = policy(lock, cookie, lock->l_req_mode, NULL);
619 if (rc == ELDLM_LOCK_CHANGED) {
620 res = lock->l_resource;
621 *flags |= LDLM_FL_LOCK_CHANGED;
622 } else if (rc == ELDLM_LOCK_ABORTED) {
623 ldlm_lock_destroy(lock);
628 lock->l_cookie = cookie;
629 lock->l_cookie_len = cookie_len;
631 if (local && lock->l_req_mode == lock->l_granted_mode) {
632 /* The server returned a blocked lock, but it was granted before
633 * we got a chance to actually enqueue it. We don't need to do
638 /* This distinction between local lock trees is very important; a client
639 * namespace only has information about locks taken by that client, and
640 * thus doesn't have enough information to decide for itself if it can
641 * be granted (below). In this case, we do exactly what the server
642 * tells us to do, as dictated by the 'flags' */
643 ldlm_resource_unlink_lock(lock);
645 if (*flags & LDLM_FL_BLOCK_CONV)
646 ldlm_resource_add_lock(res, res->lr_converting.prev,
648 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
649 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
651 ldlm_grant_lock(lock);
655 /* FIXME: We may want to optimize by checking lr_most_restr */
656 if (!list_empty(&res->lr_converting)) {
657 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
658 *flags |= LDLM_FL_BLOCK_CONV;
661 if (!list_empty(&res->lr_waiting)) {
662 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
663 *flags |= LDLM_FL_BLOCK_WAIT;
666 if (!ldlm_lock_compat(lock, 0)) {
667 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
668 *flags |= LDLM_FL_BLOCK_GRANTED;
672 ldlm_grant_lock(lock);
675 /* Don't set 'completion_ast' until here so that if the lock is granted
676 * immediately we don't do an unnecessary completion call. */
677 lock->l_completion_ast = completion;
681 /* Must be called with namespace taken: queue is waiting or converting. */
682 static int ldlm_reprocess_queue(struct ldlm_resource *res,
683 struct list_head *queue)
685 struct list_head *tmp, *pos;
688 list_for_each_safe(tmp, pos, queue) {
689 struct ldlm_lock *pending;
690 pending = list_entry(tmp, struct ldlm_lock, l_res_link);
692 CDEBUG(D_INFO, "Reprocessing lock %p\n", pending);
694 if (!ldlm_lock_compat(pending, 1))
697 list_del_init(&pending->l_res_link);
698 ldlm_grant_lock(pending);
704 void ldlm_run_ast_work(struct list_head *rpc_list)
706 struct list_head *tmp, *pos;
710 list_for_each_safe(tmp, pos, rpc_list) {
711 struct ldlm_ast_work *w =
712 list_entry(tmp, struct ldlm_ast_work, w_list);
715 rc = w->w_lock->l_blocking_ast
716 (w->w_lock, &w->w_desc, w->w_data,
719 rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags);
721 CERROR("Failed AST - should clean & disconnect "
723 LDLM_LOCK_PUT(w->w_lock);
724 list_del(&w->w_list);
725 OBD_FREE(w, sizeof(*w));
730 /* Must be called with resource->lr_lock not taken. */
731 void ldlm_reprocess_all(struct ldlm_resource *res)
733 struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
736 /* Local lock trees don't get reprocessed. */
737 if (res->lr_namespace->ns_client) {
742 l_lock(&res->lr_namespace->ns_lock);
743 res->lr_tmp = &rpc_list;
745 ldlm_reprocess_queue(res, &res->lr_converting);
746 if (list_empty(&res->lr_converting))
747 ldlm_reprocess_queue(res, &res->lr_waiting);
750 l_unlock(&res->lr_namespace->ns_lock);
752 ldlm_run_ast_work(&rpc_list);
756 void ldlm_lock_cancel(struct ldlm_lock *lock)
758 struct ldlm_resource *res;
759 struct ldlm_namespace *ns;
762 res = lock->l_resource;
763 ns = res->lr_namespace;
765 l_lock(&ns->ns_lock);
766 if (lock->l_readers || lock->l_writers)
767 CDEBUG(D_INFO, "lock still has references (%d readers, %d "
768 "writers)\n", lock->l_readers, lock->l_writers);
770 ldlm_resource_unlink_lock(lock);
771 ldlm_lock_destroy(lock);
772 l_unlock(&ns->ns_lock);
776 struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
779 struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
780 struct ldlm_resource *res;
781 struct ldlm_namespace *ns;
785 res = lock->l_resource;
786 ns = res->lr_namespace;
788 l_lock(&ns->ns_lock);
790 lock->l_req_mode = new_mode;
791 ldlm_resource_unlink_lock(lock);
793 /* If this is a local resource, put it on the appropriate list. */
794 if (res->lr_namespace->ns_client) {
795 if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED))
796 ldlm_resource_add_lock(res, res->lr_converting.prev,
799 res->lr_tmp = &rpc_list;
800 ldlm_grant_lock(lock);
803 /* FIXME: completion handling not with ns_lock held ! */
804 if (lock->l_completion_ast)
805 lock->l_completion_ast(lock, 0);
808 list_add(&lock->l_res_link, res->lr_converting.prev);
810 l_unlock(&ns->ns_lock);
813 ldlm_run_ast_work(&rpc_list);
817 void ldlm_lock_dump(struct ldlm_lock *lock)
821 if (!(portal_debug & D_OTHER))
824 if (RES_VERSION_SIZE != 4)
828 CDEBUG(D_OTHER, " NULL LDLM lock\n");
832 snprintf(ver, sizeof(ver), "%x %x %x %x",
833 lock->l_version[0], lock->l_version[1],
834 lock->l_version[2], lock->l_version[3]);
836 CDEBUG(D_OTHER, " -- Lock dump: %p (%s)\n", lock, ver);
837 CDEBUG(D_OTHER, " Parent: %p\n", lock->l_parent);
838 CDEBUG(D_OTHER, " Resource: %p (%Ld)\n", lock->l_resource,
839 lock->l_resource->lr_name[0]);
840 CDEBUG(D_OTHER, " Requested mode: %d, granted mode: %d\n",
841 (int)lock->l_req_mode, (int)lock->l_granted_mode);
842 CDEBUG(D_OTHER, " Readers: %u ; Writers; %u\n",
843 lock->l_readers, lock->l_writers);
844 if (lock->l_resource->lr_type == LDLM_EXTENT)
845 CDEBUG(D_OTHER, " Extent: %Lu -> %Lu\n",
846 (unsigned long long)lock->l_extent.start,
847 (unsigned long long)lock->l_extent.end);