Whamcloud - gitweb
b=19016
authorjxiong <jxiong>
Fri, 10 Apr 2009 04:04:16 +0000 (04:04 +0000)
committerjxiong <jxiong>
Fri, 10 Apr 2009 04:04:16 +0000 (04:04 +0000)
r=wangdi,fan.yong

Fix an issue about enqueuing a lock in clio.

lustre/lov/lovsub_lock.c
lustre/obdclass/cl_lock.c

index 6b96896..162033d 100644 (file)
@@ -86,12 +86,13 @@ static void lovsub_parent_unlock(const struct lu_env *env, struct lov_lock *lov)
         EXIT;
 }
 
-static void lovsub_lock_state_one(const struct lu_env *env,
-                                  const struct lovsub_lock *lovsub,
-                                  struct lov_lock *lov)
+static int lovsub_lock_state_one(const struct lu_env *env,
+                                 const struct lovsub_lock *lovsub,
+                                 struct lov_lock *lov)
 {
-        struct cl_lock       *parent;
-        const struct cl_lock *child;
+        struct cl_lock *parent;
+        struct cl_lock *child;
+        int             restart = 0;
 
         ENTRY;
         parent = lov->lls_cl.cls_lock;
@@ -99,13 +100,24 @@ static void lovsub_lock_state_one(const struct lu_env *env,
 
         if (lovsub->lss_active != parent) {
                 lovsub_parent_lock(env, lov);
-                if (child->cll_error != 0)
+                if (child->cll_error != 0 && parent->cll_error == 0) {
+                        /*
+                         * This is a deadlock case:
+                         * cl_lock_error(for the parent lock)
+                         *   -> cl_lock_delete
+                         *     -> lov_lock_delete
+                         *       -> cl_lock_enclosure
+                         *         -> cl_lock_mutex_try(for the child lock)
+                         */
+                        cl_lock_mutex_put(env, child);
                         cl_lock_error(env, parent, child->cll_error);
-                else
+                        restart = 1;
+                } else {
                         cl_lock_signal(env, parent);
+                }
                 lovsub_parent_unlock(env, lov);
         }
-        EXIT;
+        RETURN(restart);
 }
 
 /**
@@ -119,23 +131,22 @@ static void lovsub_lock_state(const struct lu_env *env,
 {
         struct lovsub_lock   *sub = cl2lovsub_lock(slice);
         struct lov_lock_link *scan;
-        struct lov_lock_link *temp;
+        int                   restart = 0;
 
         LASSERT(cl_lock_is_mutexed(slice->cls_lock));
         ENTRY;
 
-        /*
-         * Use _safe() version, because
-         *
-         *     lovsub_lock_state_one()
-         *       ->cl_lock_error()
-         *         ->cl_lock_delete()
-         *           ->lov_lock_delete()
-         *
-         * can unlink parent from the parent list.
-         */
-        list_for_each_entry_safe(scan, temp, &sub->lss_parents, lll_list)
-                lovsub_lock_state_one(env, sub, scan->lll_super);
+        do {
+                restart = 0;
+                list_for_each_entry(scan, &sub->lss_parents, lll_list) {
+                        restart = lovsub_lock_state_one(env, sub,
+                                                        scan->lll_super);
+                        if (restart) {
+                                cl_lock_mutex_get(env, slice->cls_lock);
+                                break;
+                        }
+                }
+        } while(restart);
         EXIT;
 }
 
index bd935a6..b75685f 100644 (file)
@@ -1600,10 +1600,6 @@ EXPORT_SYMBOL(cl_lock_delete);
  * time-out happens.
  *
  * \pre atomic_read(&lock->cll_ref) > 0
- * \pre ergo(cl_lock_nesting(lock) == CNL_TOP,
- *           cl_lock_nr_mutexed(env) == 1)
- *      [i.e., if a top-lock failed, mutices of no other locks can be held, as
- *      failing sub-locks might require releasing a top-lock mutex]
  *
  * \see clo_lock_delete()
  * \see cl_lock::cll_holds
@@ -1632,12 +1628,6 @@ EXPORT_SYMBOL(cl_lock_error);
  *
  * Cancellation notification is delivered to layers at most once.
  *
- * \pre ergo(cl_lock_nesting(lock) == CNL_TOP,
- *           cl_lock_nr_mutexed(env) == 1)
- *      [i.e., if a top-lock is canceled, mutices of no other locks can be
- *      held, as cancellation of sub-locks might require releasing a top-lock
- *      mutex]
- *
  * \see cl_lock_operations::clo_cancel()
  * \see cl_lock::cll_holds
  */
@@ -1645,8 +1635,6 @@ void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock)
 {
         LINVRNT(cl_lock_is_mutexed(lock));
         LINVRNT(cl_lock_invariant(env, lock));
-        LASSERT(ergo(cl_lock_nesting(lock) == CNL_TOP,
-                     cl_lock_nr_mutexed(env) == 1));
 
         ENTRY;
         if (lock->cll_holds == 0)