Whamcloud - gitweb
b=3772
[fs/lustre-release.git] / lustre / ptlrpc / recover.c
index 8db67c7..6d4af7a 100644 (file)
@@ -105,7 +105,7 @@ void ptlrpc_run_failed_import_upcall(struct obd_import* imp)
         rc = USERMODEHELPER(argv[0], argv, envp);
         if (rc < 0) {
                 CERROR("Error invoking recovery upcall %s %s %s %s %s: %d; "
-                       "check /proc/sys/lustre/lustre_upcall\n",
+                       "check /proc/sys/lustre/upcall\n",
                        argv[0], argv[1], argv[2], argv[3], argv[4],rc);
 
         } else {
@@ -151,7 +151,7 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
 {
         int rc = 0;
         struct list_head *tmp, *pos;
-        struct ptlrpc_request *req;
+        struct ptlrpc_request *req = NULL;
         unsigned long flags;
         __u64 last_transno;
         ENTRY;
@@ -187,16 +187,34 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
          */
         list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+
+                /* If need to resend, stop on the matching one first. It's 
+                   possible though it's already been committed, so in that case 
+                   we'll just continue with replay */
+                if (imp->imp_resend_replay && 
+                    req->rq_transno == last_transno) {
+                        lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
+                        break;
+                }
+
                 if (req->rq_transno > last_transno) {
-                        rc = ptlrpc_replay_req(req);
-                        if (rc) {
-                                CERROR("recovery replay error %d for req "
-                                       LPD64"\n", rc, req->rq_xid);
-                                RETURN(rc);
-                        }
-                        *inflight = 1;
+                        imp->imp_last_replay_transno = req->rq_transno;
                         break;
                 }
+
+                req = NULL;
+        }
+
+        imp->imp_resend_replay = 0;
+
+        if (req != NULL) {
+                rc = ptlrpc_replay_req(req);
+                if (rc) {
+                        CERROR("recovery replay error %d for req "
+                               LPD64"\n", rc, req->rq_xid);
+                        RETURN(rc);
+                }
+                *inflight = 1;
         }
         RETURN(rc);
 }
@@ -275,8 +293,7 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
         /* Wait for recovery to complete and resend. If evicted, then
            this request will be errored out later.*/
         spin_lock_irqsave(&failed_req->rq_lock, flags);
-        if (!failed_req->rq_no_resend)
-                failed_req->rq_resend = 1;
+        failed_req->rq_resend = 1;
         spin_unlock_irqrestore(&failed_req->rq_lock, flags);
         
         EXIT;