From 904493207e30da9730f497b8462221cfef0715c3 Mon Sep 17 00:00:00 2001 From: green Date: Wed, 4 Mar 2009 19:14:07 +0000 Subject: [PATCH] b=17614 r=adilger, shadow Do not put cancelled locks into replay list, hold references on locks in replay list --- lustre/ChangeLog | 8 ++++++++ lustre/ldlm/ldlm_request.c | 15 ++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index b1b5ab8..6f50845 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -111,6 +111,14 @@ Details : While using HA for Lustre servers with Linux RAID, it is possible written. Also while reading the MMP block, we should read it from disk and not the cached one. +Severity : minor +Frequency : rare, during recovery +Bugzilla : 17895 +Description: Assertion failure in ldlm_lock_put +Details : Do not put cancelled locks into replay list, hold references on + locks in replay list + + ------------------------------------------------------------------------------- 2008-12-31 Sun Microsystems, Inc. * version 1.8.0 diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index cafd530..cd85edd 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -1801,9 +1801,15 @@ static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure) /* we use l_pending_chain here, because it's unused on clients. */ LASSERTF(list_empty(&lock->l_pending_chain),"lock %p next %p prev %p\n", lock, &lock->l_pending_chain.next,&lock->l_pending_chain.prev); - /* bug 9573: don't replay locks left after eviction */ - if (!(lock->l_flags & LDLM_FL_FAILED)) + /* bug 9573: don't replay locks left after eviction, or + * bug 17614: locks being actively cancelled. Get a reference + * on a lock so that it does not disapear under us (e.g. due to cancel) + */ + if (!(lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_CANCELING))) { list_add(&lock->l_pending_chain, list); + LDLM_LOCK_GET(lock); + } + return LDLM_ITER_CONTINUE; } @@ -1968,9 +1974,12 @@ int ldlm_replay_locks(struct obd_import *imp) &list); list_for_each_entry_safe(lock, next, &list, l_pending_chain) { list_del_init(&lock->l_pending_chain); - if (rc) + if (rc) { + LDLM_LOCK_PUT(lock); continue; /* or try to do the rest? */ + } rc = replay_one_lock(imp, lock); + LDLM_LOCK_PUT(lock); } } atomic_dec(&imp->imp_replay_inflight); -- 1.8.3.1