Whamcloud - gitweb
- Use l_wait_event in ldlm_completion_ast to both trigger recovery and make
authorshaver <shaver>
Wed, 25 Sep 2002 22:58:09 +0000 (22:58 +0000)
committershaver <shaver>
Wed, 25 Sep 2002 22:58:09 +0000 (22:58 +0000)
  hangs there due to dead MDSs/OSTs interruptible.

lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_request.c
lustre/mds/mds_reint.c

index 17eb283..d299e03 100644 (file)
@@ -823,8 +823,7 @@ void ldlm_cancel_locks_for_export(struct obd_export *exp)
                 struct ldlm_resource *res;
                 lock = list_entry(iter, struct ldlm_lock, l_export_chain);
                 res = ldlm_resource_getref(lock->l_resource);
-                CDEBUG(D_INFO, "Cancelling lock:");
-                ldlm_lock_dump(lock);
+                LDLM_DEBUG(lock, "cancelling lock for export %p", exp);
                 ldlm_lock_cancel(lock);
                 ldlm_reprocess_all(res);
                 ldlm_resource_put(res);
index ee4594a..02aebe9 100644 (file)
 #include <linux/lustre_dlm.h>
 #include <linux/obd.h>
 
+static int interrupted_completion_wait(void *data)
+{
+        RETURN(1);
+}
+
+static int expired_completion_wait(void *data)
+{
+        struct ldlm_lock *lock = data;
+        class_signal_connection_failure(lock->l_export->exp_connection);
+        RETURN(0);
+}
+
 int ldlm_completion_ast(struct ldlm_lock *lock, int flags)
 {
+        struct l_wait_info lwi = 
+                LWI_TIMEOUT_INTR(obd_timeout * HZ, expired_completion_wait,
+                                 interrupted_completion_wait, lock);
+        int rc = 0;
         ENTRY;
 
         if (flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
@@ -26,17 +42,26 @@ int ldlm_completion_ast(struct ldlm_lock *lock, int flags)
                            " sleeping");
                 ldlm_lock_dump(lock);
                 ldlm_reprocess_all(lock->l_resource);
-                wait_event(lock->l_waitq, (lock->l_req_mode ==
-                                           lock->l_granted_mode));
-                LDLM_DEBUG(lock, "client-side enqueue waking up: granted");
+                rc = l_wait_event(lock->l_waitq,
+                                  (lock->l_req_mode == lock->l_granted_mode),
+                                  &lwi);
+                if (rc) {
+                        LDLM_DEBUG(lock,
+                                   "client-side enqueue waking up: failed (%d)",
+                                   rc);
+                } else {
+                        LDLM_DEBUG(lock, 
+                                   "client-side enqueue waking up: granted");
+                }
         } else if (flags == LDLM_FL_WAIT_NOREPROC) {
-                wait_event(lock->l_waitq, (lock->l_req_mode ==
-                                           lock->l_granted_mode));
+                rc = l_wait_event(lock->l_waitq,
+                                  (lock->l_req_mode == lock->l_granted_mode),
+                                  &lwi);
         } else if (flags == 0) {
                 wake_up(&lock->l_waitq);
         }
 
-        RETURN(0);
+        RETURN(rc);
 }
 
 static int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
index b0137d9..b8c6a8e 100644 (file)
@@ -801,8 +801,8 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
                                       &flags, ldlm_completion_ast,
                                       mds_blocking_ast, NULL, 0, &oldhandle);
                 if (rc)
-                        CERROR("failed to get child inode lock (child ino "LPD64", "
-                               "dir ino %ld)\n",
+                        CERROR("failed to get child inode lock (child ino "
+                               LPD64" dir ino %ld)\n",
                                res_id[0], de_old->d_inode->i_ino);
         }