From 90bf6ce00001025fdecae7e82aab33726aaa73e0 Mon Sep 17 00:00:00 2001 From: adilger Date: Tue, 29 Mar 2005 13:03:33 +0000 Subject: [PATCH] Branch: b1_4 Don't re-add a lock being destroyed to the waiting locks list. Debugging (that doesn't panic system) in case this happens again. b=5653 r=phil --- lustre/ChangeLog | 1 + lustre/ldlm/ldlm_lock.c | 4 ++-- lustre/ldlm/ldlm_lockd.c | 36 +++++++++++++++++++++++++++++++++++- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index b8b68ed..1121eb6 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -7,6 +7,7 @@ - swab llog records of type '0' so we get proper header size/idx (5861) - send llog cancel req to DLM cancel portal instead of cb portal (5515) - avoid SetPageDirty on 2.6 (5981) + - don't re-add just-being-destroyed locks to the waiting list (5653) * miscellania - by default create 1 inode per 4kB space on MDS, per 16kB on OSTs - allow --write-conf on an MDS with different nettype than client (5619) diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index be4bf58..9e57724 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -684,7 +684,7 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, struct l_wait_info lwi; if (lock->l_completion_ast) { int err = lock->l_completion_ast(lock, - LDLM_FL_WAIT_NOREPROC, + LDLM_FL_WAIT_NOREPROC, NULL); if (err) { rc = 0; @@ -708,7 +708,7 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, type == LDLM_PLAIN ? res_id->name[3] : policy->l_extent.end); l_unlock(&ns->ns_lock); - } else if (!(flags & LDLM_FL_TEST_LOCK)) { /*less verbose for test-only*/ + } else if (!(flags & LDLM_FL_TEST_LOCK)) {/*less verbose for test-only*/ LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res " LPU64"/"LPU64" ("LPU64" "LPU64")", ns, type, mode, res_id->name[0], res_id->name[1], diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index f5616ad..eb4565d 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -66,6 +66,7 @@ static struct timer_list waiting_locks_timer; static struct expired_lock_thread { wait_queue_head_t elt_waitq; int elt_state; + int elt_dump; struct list_head elt_expired_locks; } expired_lock_thread; #endif @@ -129,6 +130,18 @@ static int expired_lock_main(void *arg) &lwi); spin_lock_bh(&waiting_locks_spinlock); + if (expired_lock_thread.elt_dump) { + spin_unlock_bh(&waiting_locks_spinlock); + + /* from waiting_locks_callback, but not in timer */ + portals_debug_dumplog(); + portals_run_lbug_upcall(__FILE__, "waiting_locks_cb", + expired_lock_thread.elt_dump); + + spin_lock_bh(&waiting_locks_spinlock); + expired_lock_thread.elt_dump = 0; + } + while (!list_empty(expired)) { struct obd_export *export; struct ldlm_lock *lock; @@ -168,6 +181,7 @@ static int expired_lock_main(void *arg) RETURN(0); } +/* This is called from within a timer interrupt and cannot schedule */ static void waiting_locks_callback(unsigned long unused) { struct ldlm_lock *lock, *last = NULL; @@ -194,8 +208,17 @@ static void waiting_locks_callback(unsigned long unused) &lock->l_pending_chain, lock->l_pending_chain.next, lock->l_pending_chain.prev); + + INIT_LIST_HEAD(&waiting_locks_list); /* HACK */ + expired_lock_thread.elt_dump = __LINE__; spin_unlock_bh(&waiting_locks_spinlock); - LBUG(); + + /* LBUG(); */ + CEMERG("would be an LBUG, but isn't (bug 5653)\n"); + portals_debug_dumpstack(NULL); + /*blocks* portals_debug_dumplog(); */ + /*blocks* portals_run_lbug_upcall(file, func, line); */ + break; } last = lock; @@ -235,6 +258,17 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock) l_check_ns_lock(lock->l_resource->lr_namespace); spin_lock_bh(&waiting_locks_spinlock); + if (lock->l_destroyed) { + static unsigned long next; + spin_unlock_bh(&waiting_locks_spinlock); + LDLM_ERROR(lock, "not waiting on destroyed lock (bug 5653)"); + if (time_after(jiffies, next)) { + next = jiffies + 14400 * HZ; + portals_debug_dumpstack(NULL); + } + return 0; + } + if (!list_empty(&lock->l_pending_chain)) { spin_unlock_bh(&waiting_locks_spinlock); LDLM_DEBUG(lock, "not re-adding to wait list"); -- 1.8.3.1