Whamcloud - gitweb
b=17614
authorgreen <green>
Wed, 4 Mar 2009 19:13:48 +0000 (19:13 +0000)
committergreen <green>
Wed, 4 Mar 2009 19:13:48 +0000 (19:13 +0000)
r=adilger, shadow

Do not put cancelled locks into replay list, hold references on locks in replay list

lustre/ChangeLog
lustre/ldlm/ldlm_request.c

index fc77420..f1cbfe2 100644 (file)
@@ -839,6 +839,14 @@ Details    : While using HA for Lustre servers with Linux RAID, it is possible
             written. Also while reading the MMP block, we should read it from
             disk and not the cached one.
 
+Severity   : minor
+Frequency  : rare, during recovery
+Bugzilla   : 17895
+Description: Assertion failure in ldlm_lock_put
+Details    : Do not put cancelled locks into replay list, hold references on
+             locks in replay list
+
+
 -------------------------------------------------------------------------------
 
 2008-05-26  Sun Microsystems, Inc.
index b916952..865d968 100644 (file)
@@ -1900,9 +1900,15 @@ static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
         /* we use l_pending_chain here, because it's unused on clients. */
         LASSERTF(list_empty(&lock->l_pending_chain),"lock %p next %p prev %p\n",
                  lock, &lock->l_pending_chain.next,&lock->l_pending_chain.prev);
-        /* bug 9573: don't replay locks left after eviction */
-        if (!(lock->l_flags & LDLM_FL_FAILED))
+        /* bug 9573: don't replay locks left after eviction, or
+         * bug 17614: locks being actively cancelled. Get a reference
+         * on a lock so that it does not disapear under us (e.g. due to cancel)
+         */
+        if (!(lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_CANCELING))) {
                 list_add(&lock->l_pending_chain, list);
+                LDLM_LOCK_GET(lock);
+        }
+
         return LDLM_ITER_CONTINUE;
 }
 
@@ -2066,9 +2072,12 @@ int ldlm_replay_locks(struct obd_import *imp)
                                              &list);
                 list_for_each_entry_safe(lock, next, &list, l_pending_chain) {
                         list_del_init(&lock->l_pending_chain);
-                        if (rc)
+                        if (rc) {
+                                LDLM_LOCK_PUT(lock);
                                 continue; /* or try to do the rest? */
+                        }
                         rc = replay_one_lock(imp, lock);
+                        LDLM_LOCK_PUT(lock);
                 }
         }
         atomic_dec(&imp->imp_replay_inflight);