Whamcloud - gitweb
Landing b_recovery
[fs/lustre-release.git] / lustre / ptlrpc / recover.c
index ed969fe..a569ab7 100644 (file)
@@ -50,13 +50,13 @@ void ptlrpc_run_recovery_over_upcall(struct obd_device *obd)
         char *argv[4];
         char *envp[3];
         int rc;
-
         ENTRY;
+
         argv[0] = obd_lustre_upcall;
         argv[1] = "RECOVERY_OVER";
         argv[2] = obd->obd_uuid.uuid;
         argv[3] = NULL;
-
+        
         envp[0] = "HOME=/";
         envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
         envp[2] = NULL;
@@ -68,7 +68,7 @@ void ptlrpc_run_recovery_over_upcall(struct obd_device *obd)
                        argv[0], argv[1], argv[2], rc);
 
         } else {
-                CERROR("Invoked upcall %s %s %s",
+                CERROR("Invoked upcall %s %s %s\n",
                        argv[0], argv[1], argv[2]);
         }
 }
@@ -76,11 +76,20 @@ void ptlrpc_run_recovery_over_upcall(struct obd_device *obd)
 void ptlrpc_run_failed_import_upcall(struct obd_import* imp)
 {
 #ifdef __KERNEL__
+        unsigned long flags;
         char *argv[7];
         char *envp[3];
         int rc;
-
         ENTRY;
+
+        spin_lock_irqsave(&imp->imp_lock, flags);
+        if (imp->imp_state == LUSTRE_IMP_CLOSED) {
+                spin_unlock_irqrestore(&imp->imp_lock, flags);
+                EXIT;
+                return;
+        }
+        spin_unlock_irqrestore(&imp->imp_lock, flags);
+        
         argv[0] = obd_lustre_upcall;
         argv[1] = "FAILED_IMPORT";
         argv[2] = imp->imp_target_uuid.uuid;
@@ -108,12 +117,14 @@ void ptlrpc_run_failed_import_upcall(struct obd_import* imp)
 #endif
 }
 
-int ptlrpc_replay(struct obd_import *imp)
+int ptlrpc_replay_next(struct obd_import *imp)
 {
         int rc = 0;
         struct list_head *tmp, *pos;
         struct ptlrpc_request *req;
         unsigned long flags;
+        __u64 last_transno;
+        int sent_req = 0;
         ENTRY;
 
         /* It might have committed some after we last spoke, so make sure we
@@ -121,16 +132,11 @@ int ptlrpc_replay(struct obd_import *imp)
          */
         spin_lock_irqsave(&imp->imp_lock, flags);
         ptlrpc_free_committed(imp);
+        last_transno = imp->imp_last_replay_transno;
         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
         CDEBUG(D_HA, "import %p from %s has committed "LPD64"\n",
                imp, imp->imp_target_uuid.uuid, imp->imp_peer_committed_transno);
-
-        list_for_each(tmp, &imp->imp_replay_list) {
-                req = list_entry(tmp, struct ptlrpc_request, rq_list);
-                DEBUG_REQ(D_HA, req, "RETAINED: ");
-        }
-
         /* Do I need to hold a lock across this iteration?  We shouldn't be
          * racing with any additions to the list, because we're in recovery
          * and are therefore not processing additional requests to add.  Calls
@@ -147,20 +153,27 @@ int ptlrpc_replay(struct obd_import *imp)
          * just a little race...
          */
         list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
-                req = list_entry(tmp, struct ptlrpc_request, rq_list);
-
-                DEBUG_REQ(D_HA, req, "REPLAY:");
-
-                rc = ptlrpc_replay_req(req);
-
-                if (rc) {
-                        CERROR("recovery replay error %d for req "LPD64"\n",
-                               rc, req->rq_xid);
-                        RETURN(rc);
+                req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+                if (req->rq_transno > last_transno) {
+                        /* remove from list so ptlrpcd can send the
+                           req, it should be reinserted after it is
+                           sent and replied.  Perhaps better solution
+                           would be to add req->rq_replay_list so the
+                           req can be saved for replay and still go
+                           through the normal send thread. */
+                        rc = ptlrpc_replay_req(req);
+                        if (rc) {
+                                CERROR("recovery replay error %d for req "LPD64"\n",
+                                       rc, req->rq_xid);
+                                RETURN(rc);
+                        }
+                        sent_req = 1;
+                        break;
                 }
+
         }
 
-        RETURN(0);
+        RETURN(sent_req);
 }
 
 int ptlrpc_resend(struct obd_import *imp)
@@ -199,10 +212,6 @@ void ptlrpc_wake_delayed(struct obd_import *imp)
         list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
 
-                ptlrpc_put_connection(req->rq_connection);
-                req->rq_connection =
-                       ptlrpc_connection_addref(req->rq_import->imp_connection);
-
                 if (req->rq_set) {
                         DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
                         wake_up(&req->rq_set->set_waitq);
@@ -225,12 +234,13 @@ inline void ptlrpc_invalidate_import_state(struct obd_import *imp)
         obd_invalidate_import(obd, imp);
 #endif
 
-        ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY | LDLM_FL_CANCEL);
+        ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
 }
 
 void ptlrpc_handle_failed_import(struct obd_import *imp)
 {
         ENTRY;
+
         if (!imp->imp_replayable) {
                 CDEBUG(D_HA,
                        "import %s@%s for %s not replayable, deactivating\n",
@@ -255,20 +265,18 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
                imp->imp_obd->obd_name,
                imp->imp_target_uuid.uuid,
                imp->imp_connection->c_remote_uuid.uuid);
+        
+        ptlrpc_set_import_discon(imp);
 
-        rc = ptlrpc_recover_import_no_retry(imp, NULL);
-
-        if (failed_req->rq_import_generation != imp->imp_generation) {
-                spin_lock_irqsave (&failed_req->rq_lock, flags);
-                failed_req->rq_err = 1;
-                spin_unlock_irqrestore (&failed_req->rq_lock, flags);
-        }
-        else {
-                ptlrpc_resend_req(failed_req);
-                if (rc && rc != -EALREADY)
-                        ptlrpc_handle_failed_import(imp);
-                        
-        }
+        rc = ptlrpc_connect_import(imp, NULL);
+        
+        /* Wait for recovery to complete and resend. If evicted, then
+           this request will be errored out later.*/
+        spin_lock_irqsave(&failed_req->rq_lock, flags);
+        if (!failed_req->rq_no_resend)
+                failed_req->rq_resend = 1;
+        spin_unlock_irqrestore(&failed_req->rq_lock, flags);
+        
         EXIT;
 }
 
@@ -311,222 +319,66 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
         RETURN(0);
 }
 
-void ptlrpc_fail_import(struct obd_import *imp, int generation)
-{
-        unsigned long flags;
-        int in_recovery = 0;
-        ENTRY;
-
-        LASSERT (!imp->imp_dlm_fake);
-
-        spin_lock_irqsave(&imp->imp_lock, flags);
-        if (imp->imp_state != LUSTRE_IMP_FULL) {
-                in_recovery = 1;
-        } else {
-                CDEBUG(D_HA, "%s: new state: DISCON\n", 
-                       imp->imp_client->cli_name);
-                imp->imp_state = LUSTRE_IMP_DISCON;
-        }
-        spin_unlock_irqrestore(&imp->imp_lock, flags);
-
-        if (in_recovery) {
-                EXIT;
-                return;
-        }
-
-        ptlrpc_handle_failed_import(imp);
-        EXIT;
-}
-
-static int signal_completed_replay(struct obd_import *imp)
-{
-        struct ptlrpc_request *req;
-        int rc;
-        ENTRY;
-
-        req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL);
-        if (!req)
-                RETURN(-ENOMEM);
-
-        req->rq_replen = lustre_msg_size(0, NULL);
-        req->rq_send_state = LUSTRE_IMP_REPLAY;
-        req->rq_reqmsg->flags |= MSG_LAST_REPLAY;
-        req->rq_timeout *= 3; 
-
-        rc = ptlrpc_queue_wait(req);
-
-        ptlrpc_req_finished(req);
-        RETURN(rc);
-}
-
 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
 {
         int rc;
         ENTRY;
         
+        /* force import to be disconnected. */
+        ptlrpc_set_import_discon(imp);
+        
         rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
 
-        if (rc && rc != -EALREADY) {
-                unsigned long flags;
-                CDEBUG(D_HA, "recovery of %s on %s failed (%d); restarting\n",
-                       imp->imp_target_uuid.uuid,
-                       new_uuid ? new_uuid :
-                       (char *)imp->imp_connection->c_remote_uuid.uuid, rc);
-                spin_lock_irqsave(&imp->imp_lock, flags);
-                imp->imp_state = LUSTRE_IMP_FULL;
-                spin_unlock_irqrestore(&imp->imp_lock, flags);
-                ptlrpc_fail_import(imp, imp->imp_generation);
-        }
         RETURN(rc);
 }
 
+int ptlrpc_import_in_recovery(struct obd_import *imp)
+{
+        unsigned long flags;
+        int in_recovery = 1;
+        spin_lock_irqsave(&imp->imp_lock, flags);
+        if (imp->imp_state == LUSTRE_IMP_FULL ||
+            imp->imp_state == LUSTRE_IMP_CLOSED ||
+            imp->imp_state == LUSTRE_IMP_DISCON)
+                in_recovery = 0;
+        spin_unlock_irqrestore(&imp->imp_lock, flags);
+        return in_recovery;
+}
+
 static int ptlrpc_recover_import_no_retry(struct obd_import *imp,
                                           char *new_uuid)
 {
         int rc;
         unsigned long flags;
         int in_recovery = 0;
-        int was_invalid = 0;
+        struct l_wait_info lwi;
         ENTRY;
 
         spin_lock_irqsave(&imp->imp_lock, flags);
-        if (imp->imp_state == LUSTRE_IMP_FULL) {
-                CDEBUG(D_HA, "%s: new state: DISCON\n", 
-                       imp->imp_client->cli_name);
-                imp->imp_state = LUSTRE_IMP_DISCON;
-        } 
-        
         if (imp->imp_state != LUSTRE_IMP_DISCON) {
                 in_recovery = 1;
-        } else if (imp->imp_invalid) {
-                imp->imp_invalid = 0;
-                was_invalid = 1;
-        }
-
+        } 
         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
         if (in_recovery == 1)
                 RETURN(-EALREADY);
 
-        down(&imp->imp_recovery_sem);
-        /* If recovery happened while we waited, we're done. */
-        if (imp->imp_state == LUSTRE_IMP_FULL)
-                GOTO(out, rc = 0);
-
-        LASSERT (imp->imp_state == LUSTRE_IMP_DISCON);
-
-        if (new_uuid) {
-                struct ptlrpc_connection *conn;
-                struct obd_uuid uuid;
-                struct ptlrpc_peer peer;
-                struct obd_export *dlmexp;
-
-                obd_str2uuid(&uuid, new_uuid);
-                if (ptlrpc_uuid_to_peer(&uuid, &peer)) {
-                        CERROR("no connection found for UUID %s\n", new_uuid);
-                        GOTO(out, rc = -EINVAL);
-                }
-
-                conn = ptlrpc_get_connection(&peer, &uuid);
-                if (!conn)
-                        GOTO(out, rc = -ENOMEM);
-
-                CDEBUG(D_HA, "switching import %s/%s from %s to %s\n",
-                       imp->imp_target_uuid.uuid, imp->imp_obd->obd_name,
-                       imp->imp_connection->c_remote_uuid.uuid,
-                       conn->c_remote_uuid.uuid);
-
-                /* Switch the import's connection and the DLM export's
-                 * connection (which are almost certainly the same, but we
-                 * keep distinct refs just to make things clearer. I think. */
-                if (imp->imp_connection)
-                        ptlrpc_put_connection(imp->imp_connection);
-                /* We hand off the ref from ptlrpc_get_connection. */
-                imp->imp_connection = conn;
-
-                dlmexp = class_conn2export(&imp->imp_dlm_handle);
-                if (dlmexp->exp_connection)
-                        ptlrpc_put_connection(dlmexp->exp_connection);
-                dlmexp->exp_connection = ptlrpc_connection_addref(conn);
-                class_export_put(dlmexp);
-
-        }
-
- connect:
-        rc = ptlrpc_connect_import(imp);
-
-        if (rc < 0) {
-                CERROR("failed to reconnect to %s@%s: %d\n",
-                       imp->imp_target_uuid.uuid,
-                       imp->imp_connection->c_remote_uuid.uuid, rc);
-                GOTO(out, rc);
-        } 
-
-        if (imp->imp_state == LUSTRE_IMP_EVICTED) {
-                CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
-                       imp->imp_target_uuid.uuid,
-                       imp->imp_connection->c_remote_uuid.uuid);
-                ptlrpc_set_import_active(imp, 0);
-                CDEBUG(D_HA, "%s: new state: RECOVER\n", 
-                       imp->imp_client->cli_name);
-                imp->imp_state = LUSTRE_IMP_RECOVER;
-        } 
         
-        if (imp->imp_state == LUSTRE_IMP_REPLAY) {
-                CDEBUG(D_HA, "replay requested by %s\n",
-                       imp->imp_target_uuid.uuid);
-                rc = ptlrpc_replay(imp);
-                if (rc)
-                        GOTO(out, rc);
-
-                rc = ldlm_replay_locks(imp);
-                if (rc)
-                        GOTO(out, rc);
-
-                rc = signal_completed_replay(imp);
-                if (rc)
-                        GOTO(out, rc);
-                CDEBUG(D_HA, "%s: new state: RECOVER\n", 
-                       imp->imp_client->cli_name);
-                imp->imp_state = LUSTRE_IMP_RECOVER;
-        } 
-
-        if (imp->imp_state == LUSTRE_IMP_RECOVER) {
-                CDEBUG(D_HA, "reconnected to %s@%s\n",
-                       imp->imp_target_uuid.uuid,
-                       imp->imp_connection->c_remote_uuid.uuid);
-
-                ptlrpc_set_import_active(imp, 1);
-                ptlrpc_resend(imp);
-                spin_lock_irqsave(&imp->imp_lock, flags);
-                CDEBUG(D_HA, "%s: new state: FULL\n", 
-                       imp->imp_client->cli_name);
-                imp->imp_state = LUSTRE_IMP_FULL;
-                spin_unlock_irqrestore(&imp->imp_lock, flags);
-                ptlrpc_wake_delayed(imp);
-        } 
+        rc = ptlrpc_connect_import(imp, new_uuid);
+        if (rc)
+                RETURN(rc);
 
+        CDEBUG(D_ERROR, "%s: recovery started, waiting\n", 
+               imp->imp_client->cli_name);
 
-        LASSERT(imp->imp_state == LUSTRE_IMP_FULL);
+        lwi = LWI_TIMEOUT(MAX(obd_timeout * HZ, 1), NULL, NULL);
+        rc = l_wait_event(imp->imp_recovery_waitq, 
+                          !ptlrpc_import_in_recovery(imp), &lwi);
+        CDEBUG(D_ERROR, "%s: recovery finished\n", 
+               imp->imp_client->cli_name);
 
- out:
-        if (rc != 0) {
-                spin_lock_irqsave(&imp->imp_lock, flags);
-                imp->imp_state = LUSTRE_IMP_DISCON;
-                spin_unlock_irqrestore(&imp->imp_lock, flags);
-                
-                if (rc == -ENOTCONN) {
-                        CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery;"
-                               "invalidating and reconnecting\n",
-                               imp->imp_target_uuid.uuid,
-                               imp->imp_connection->c_remote_uuid.uuid);
-                        GOTO(connect, -ENOTCONN);
-                } else if (was_invalid) {
-                        ptlrpc_set_import_active(imp, 0);
-                }
-        }
-        up(&imp->imp_recovery_sem);
         RETURN(rc);
+        
 }
 
 void ptlrpc_fail_export(struct obd_export *exp)