From: jxiong Date: Fri, 10 Apr 2009 04:04:16 +0000 (+0000) Subject: b=19016 X-Git-Tag: v1_9_167~13 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=38ea879fa74d9c89d2c8ece14c860be6a8f3235e;p=fs%2Flustre-release.git b=19016 r=wangdi,fan.yong Fix an issue about enqueuing a lock in clio. --- diff --git a/lustre/lov/lovsub_lock.c b/lustre/lov/lovsub_lock.c index 6b96896..162033d 100644 --- a/lustre/lov/lovsub_lock.c +++ b/lustre/lov/lovsub_lock.c @@ -86,12 +86,13 @@ static void lovsub_parent_unlock(const struct lu_env *env, struct lov_lock *lov) EXIT; } -static void lovsub_lock_state_one(const struct lu_env *env, - const struct lovsub_lock *lovsub, - struct lov_lock *lov) +static int lovsub_lock_state_one(const struct lu_env *env, + const struct lovsub_lock *lovsub, + struct lov_lock *lov) { - struct cl_lock *parent; - const struct cl_lock *child; + struct cl_lock *parent; + struct cl_lock *child; + int restart = 0; ENTRY; parent = lov->lls_cl.cls_lock; @@ -99,13 +100,24 @@ static void lovsub_lock_state_one(const struct lu_env *env, if (lovsub->lss_active != parent) { lovsub_parent_lock(env, lov); - if (child->cll_error != 0) + if (child->cll_error != 0 && parent->cll_error == 0) { + /* + * This is a deadlock case: + * cl_lock_error(for the parent lock) + * -> cl_lock_delete + * -> lov_lock_delete + * -> cl_lock_enclosure + * -> cl_lock_mutex_try(for the child lock) + */ + cl_lock_mutex_put(env, child); cl_lock_error(env, parent, child->cll_error); - else + restart = 1; + } else { cl_lock_signal(env, parent); + } lovsub_parent_unlock(env, lov); } - EXIT; + RETURN(restart); } /** @@ -119,23 +131,22 @@ static void lovsub_lock_state(const struct lu_env *env, { struct lovsub_lock *sub = cl2lovsub_lock(slice); struct lov_lock_link *scan; - struct lov_lock_link *temp; + int restart = 0; LASSERT(cl_lock_is_mutexed(slice->cls_lock)); ENTRY; - /* - * Use _safe() version, because - * - * lovsub_lock_state_one() - * ->cl_lock_error() - * ->cl_lock_delete() - * ->lov_lock_delete() - * - * can unlink parent from the parent list. - */ - list_for_each_entry_safe(scan, temp, &sub->lss_parents, lll_list) - lovsub_lock_state_one(env, sub, scan->lll_super); + do { + restart = 0; + list_for_each_entry(scan, &sub->lss_parents, lll_list) { + restart = lovsub_lock_state_one(env, sub, + scan->lll_super); + if (restart) { + cl_lock_mutex_get(env, slice->cls_lock); + break; + } + } + } while(restart); EXIT; } diff --git a/lustre/obdclass/cl_lock.c b/lustre/obdclass/cl_lock.c index bd935a6..b75685f 100644 --- a/lustre/obdclass/cl_lock.c +++ b/lustre/obdclass/cl_lock.c @@ -1600,10 +1600,6 @@ EXPORT_SYMBOL(cl_lock_delete); * time-out happens. * * \pre atomic_read(&lock->cll_ref) > 0 - * \pre ergo(cl_lock_nesting(lock) == CNL_TOP, - * cl_lock_nr_mutexed(env) == 1) - * [i.e., if a top-lock failed, mutices of no other locks can be held, as - * failing sub-locks might require releasing a top-lock mutex] * * \see clo_lock_delete() * \see cl_lock::cll_holds @@ -1632,12 +1628,6 @@ EXPORT_SYMBOL(cl_lock_error); * * Cancellation notification is delivered to layers at most once. * - * \pre ergo(cl_lock_nesting(lock) == CNL_TOP, - * cl_lock_nr_mutexed(env) == 1) - * [i.e., if a top-lock is canceled, mutices of no other locks can be - * held, as cancellation of sub-locks might require releasing a top-lock - * mutex] - * * \see cl_lock_operations::clo_cancel() * \see cl_lock::cll_holds */ @@ -1645,8 +1635,6 @@ void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock) { LINVRNT(cl_lock_is_mutexed(lock)); LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(ergo(cl_lock_nesting(lock) == CNL_TOP, - cl_lock_nr_mutexed(env) == 1)); ENTRY; if (lock->cll_holds == 0)