Whamcloud - gitweb
- Add some more verbose logging of the cases that get clients into recovery.
[fs/lustre-release.git] / lustre / ptlrpc / rpc.c
index 742e460..ceefc33 100644 (file)
@@ -76,10 +76,10 @@ int connmgr_iocontrol(long cmd, struct lustre_handle *hdl, int len, void *karg,
 
         ENTRY;
 
-        if (cmd != OBD_IOC_RECOVD_NEWCONN)
-                RETURN(0);
+        if (cmd != OBD_IOC_RECOVD_NEWCONN && cmd != OBD_IOC_RECOVD_FAILCONN)
+                RETURN(-EINVAL); /* XXX ENOSYS? */
         
-        /* Find the connection that's been rebuilt. */
+        /* Find the connection that's been rebuilt or has failed. */
         spin_lock(&recovd->recovd_lock);
         list_for_each(tmp, &recovd->recovd_troubled_items) {
                 conn = list_entry(tmp, struct ptlrpc_connection,
@@ -92,10 +92,36 @@ int connmgr_iocontrol(long cmd, struct lustre_handle *hdl, int len, void *karg,
                 conn = NULL;
         }
 
-        if (!conn)
-                GOTO(out, rc = -EINVAL);
+        if (!conn) {
+                if (cmd == OBD_IOC_RECOVD_NEWCONN)
+                        GOTO(out, rc = -EINVAL);
+                /* XXX macroize/inline and share with loop above */
+                list_for_each(tmp, &recovd->recovd_managed_items) {
+                        conn = list_entry(tmp, struct ptlrpc_connection,
+                                          c_recovd_data.rd_managed_chain);
+                        
+                        LASSERT(conn->c_recovd_data.rd_recovd == recovd);
+                        
+                        if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1))
+                                break;
+                        conn = NULL;
+                }
+                if (!conn)
+                        GOTO(out, rc = -EINVAL);
+        }
+
+        if (cmd == OBD_IOC_RECOVD_FAILCONN) {
+                spin_unlock(&recovd->recovd_lock);
+                recovd_conn_fail(conn);
+                spin_lock(&recovd->recovd_lock);
+
+                /* Jump straight to the "failed" phase of recovery. */
+                conn->c_recovd_data.rd_phase = RD_FAILED;
+                goto out;
+        }
 
-        if (conn->c_recovd_data.rd_phase != RECOVD_PREPARING)
+        /* else (NEWCONN) */
+        if (conn->c_recovd_data.rd_phase != RD_PREPARING)
                 GOTO(out, rc = -EALREADY);
 
         spin_lock(&conn->c_lock);
@@ -110,7 +136,7 @@ int connmgr_iocontrol(long cmd, struct lustre_handle *hdl, int len, void *karg,
         ptlrpc_readdress_connection(conn, conn->c_remote_uuid);
         spin_unlock(&conn->c_lock);
         
-        conn->c_recovd_data.rd_phase = RECOVD_PREPARED;
+        conn->c_recovd_data.rd_phase = RD_PREPARED;
         wake_up(&recovd->recovd_waitq);
  out:
         spin_unlock(&recovd->recovd_lock);